From 857c2b1886684f62724c8e1464e151773a7573d0 Mon Sep 17 00:00:00 2001 From: David Herberth Date: Wed, 30 Oct 2024 17:11:17 +0100 Subject: [PATCH] ref(project-cache): Split project cache into shared and private parts (#4147) Makes the project cache concurrently accessible using a lockfree shared map. - Removes support for `no_cache` queries, they were not used for a long time and simplify the design (no need to make project accesses awaitable). - Removes support for the v2 project cache endpoint, v3 has been out for > 2 years. The PR also removed all project cache handling from the old (now called legacy) project cache and moved it to a newer, simplified project cache which only does the book-keeping for the actual project cache. The old Spool V1 implementation and some message handling remains in the legacy project cache. The spool v1 will eventually be removed, the other messages can now be replaced and no longer need to be messages and can be removed in follow up PRs. Some messages likes `ProcessMetrics` have already been removed. The garbage disposal has been removed, with the new organization we generate a lot less overall 'trash' which needs to be disposed as well as it is no longer necessary to offload the disposal to another thread, since the service itself has a lot less volume/work to do and service latency is no longer a major concern for stability. It is now possible to subscribe to project cache events, this is done using a tokio broadcast channel, the channel is inherently size limited, which is problematic, since currently subscribers cannot deal with lag and cannot miss any messages. To prevent this (until we have a better way), the channel is set to a maximum size which should theoretically be impossible to reach. This is more described in code itself. Functional changes: - Projects are only expired through the eviction mechanism, they are no longer expired on access, it's possible to configure Relay in a way where expired projects are used if the eviction interval is too long. To compensate for this, the config has been tuned. Ideally the eviction interval is shorter than the grace period. --- .github/workflows/ci.yml | 2 +- CHANGELOG.md | 1 + Cargo.lock | 93 +- Cargo.toml | 2 + relay-config/src/config.rs | 14 +- relay-dynamic-config/src/error_boundary.rs | 8 + relay-quotas/src/rate_limit.rs | 40 +- relay-sampling/src/evaluation.rs | 2 +- relay-server/Cargo.toml | 2 + relay-server/src/endpoints/batch_metrics.rs | 3 +- relay-server/src/endpoints/common.rs | 24 +- relay-server/src/endpoints/project_configs.rs | 81 +- relay-server/src/service.rs | 71 +- relay-server/src/services/buffer/mod.rs | 75 +- relay-server/src/services/processor.rs | 249 +++--- .../src/services/processor/metrics.rs | 2 +- .../src/services/processor/span/processing.rs | 2 +- .../src/services/projects/cache/handle.rs | 104 +++ .../projects/{cache.rs => cache/legacy.rs} | 787 ++--------------- .../src/services/projects/cache/mod.rs | 10 + .../src/services/projects/cache/project.rs | 299 +++++++ .../src/services/projects/cache/service.rs | 225 +++++ .../src/services/projects/cache/state.rs | 769 +++++++++++++++++ .../projects/project/{state => }/info.rs | 58 +- .../src/services/projects/project/mod.rs | 810 ++---------------- .../projects/project/state/fetch_state.rs | 162 ---- .../services/projects/project/state/mod.rs | 126 --- .../src/services/projects/source/mod.rs | 60 +- .../src/services/projects/source/redis.rs | 20 +- .../src/services/projects/source/upstream.rs | 49 +- relay-server/src/services/spooler/mod.rs | 18 +- relay-server/src/statsd.rs | 59 +- relay-server/src/testutils.rs | 5 +- relay-server/src/utils/garbage.rs | 113 --- relay-server/src/utils/mod.rs | 2 - tests/integration/test_projectconfigs.py | 162 ++-- tests/integration/test_query.py | 28 +- 37 files changed, 2198 insertions(+), 2339 deletions(-) create mode 100644 relay-server/src/services/projects/cache/handle.rs rename relay-server/src/services/projects/{cache.rs => cache/legacy.rs} (52%) create mode 100644 relay-server/src/services/projects/cache/mod.rs create mode 100644 relay-server/src/services/projects/cache/project.rs create mode 100644 relay-server/src/services/projects/cache/service.rs create mode 100644 relay-server/src/services/projects/cache/state.rs rename relay-server/src/services/projects/project/{state => }/info.rs (86%) delete mode 100644 relay-server/src/services/projects/project/state/fetch_state.rs delete mode 100644 relay-server/src/services/projects/project/state/mod.rs delete mode 100644 relay-server/src/utils/garbage.rs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 28548848d3..8287d3842e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -553,7 +553,7 @@ jobs: - run: make test-integration env: PYTEST_N: 6 - RELAY_VERSION_CHAIN: "20.6.0,latest" + RELAY_VERSION_CHAIN: "23.12.0,latest" sentry-relay-integration-tests: name: Sentry-Relay Integration Tests diff --git a/CHANGELOG.md b/CHANGELOG.md index bb3310320e..ba455bf728 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ **Breaking Changes**: - Removes support for metric meta envelope items. ([#4152](https://github.com/getsentry/relay/pull/4152)) +- Removes support for the project cache endpoint version 2 and before. ([#4147](https://github.com/getsentry/relay/pull/4147)) **Bug Fixes** diff --git a/Cargo.lock b/Cargo.lock index f844e78d09..8e9e350e0d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -25,9 +25,9 @@ checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" [[package]] name = "ahash" -version = "0.8.8" +version = "0.8.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42cd52102d3df161c77a887b608d7a4897d7cc112886a9537b738a887a03aaff" +checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" dependencies = [ "cfg-if", "getrandom", @@ -217,6 +217,16 @@ dependencies = [ "num-traits", ] +[[package]] +name = "atomic-wait" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a55b94919229f2c42292fd71ffa4b75e83193bffdd77b1e858cd55fd2d0b0ea8" +dependencies = [ + "libc", + "windows-sys 0.42.0", +] + [[package]] name = "atomic-waker" version = "1.1.2" @@ -412,7 +422,7 @@ dependencies = [ "bitflags 2.4.1", "cexpr", "clang-sys", - "itertools 0.10.5", + "itertools 0.13.0", "log", "prettyplease", "proc-macro2", @@ -2695,6 +2705,16 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" +[[package]] +name = "papaya" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "674d893d27388726e4020eaa60257b3df760392003c14f46455e3277af72cd04" +dependencies = [ + "atomic-wait", + "seize", +] + [[package]] name = "parking" version = "2.2.0" @@ -2999,7 +3019,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "acf0c195eebb4af52c752bec4f52f645da98b6e92077a04110c7f349477ae5ac" dependencies = [ "anyhow", - "itertools 0.10.5", + "itertools 0.13.0", "proc-macro2", "quote", "syn 2.0.77", @@ -3733,6 +3753,7 @@ dependencies = [ name = "relay-server" version = "24.10.0" dependencies = [ + "ahash", "anyhow", "arc-swap", "axum", @@ -3759,6 +3780,7 @@ dependencies = [ "minidump", "multer", "once_cell", + "papaya", "pin-project-lite", "priority-queue", "rand", @@ -4172,6 +4194,12 @@ dependencies = [ "libc", ] +[[package]] +name = "seize" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d659fa6f19e82a52ab8d3fff3c380bd8cc16462eaea411395618a38760eb85bc" + [[package]] name = "semver" version = "1.0.23" @@ -5787,6 +5815,21 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "windows-sys" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7" +dependencies = [ + "windows_aarch64_gnullvm 0.42.2", + "windows_aarch64_msvc 0.42.2", + "windows_i686_gnu 0.42.2", + "windows_i686_msvc 0.42.2", + "windows_x86_64_gnu 0.42.2", + "windows_x86_64_gnullvm 0.42.2", + "windows_x86_64_msvc 0.42.2", +] + [[package]] name = "windows-sys" version = "0.48.0" @@ -5845,6 +5888,12 @@ dependencies = [ "windows_x86_64_msvc 0.52.6", ] +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" + [[package]] name = "windows_aarch64_gnullvm" version = "0.48.5" @@ -5857,6 +5906,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" +[[package]] +name = "windows_aarch64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" + [[package]] name = "windows_aarch64_msvc" version = "0.48.5" @@ -5869,6 +5924,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" +[[package]] +name = "windows_i686_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" + [[package]] name = "windows_i686_gnu" version = "0.48.5" @@ -5887,6 +5948,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" +[[package]] +name = "windows_i686_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" + [[package]] name = "windows_i686_msvc" version = "0.48.5" @@ -5899,6 +5966,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" +[[package]] +name = "windows_x86_64_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" + [[package]] name = "windows_x86_64_gnu" version = "0.48.5" @@ -5911,6 +5984,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" + [[package]] name = "windows_x86_64_gnullvm" version = "0.48.5" @@ -5923,6 +6002,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" +[[package]] +name = "windows_x86_64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" + [[package]] name = "windows_x86_64_msvc" version = "0.48.5" diff --git a/Cargo.toml b/Cargo.toml index 2865dccce0..7b41485040 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -60,6 +60,7 @@ relay-test = { path = "relay-test" } relay-protocol-derive = { path = "relay-protocol-derive" } relay-event-derive = { path = "relay-event-derive" } +ahash = "0.8.11" android_trace_log = { version = "0.3.0", features = ["serde"] } anyhow = "1.0.66" axum = "0.7.5" @@ -119,6 +120,7 @@ num-traits = "0.2.18" num_cpus = "1.13.0" once_cell = "1.13.1" opentelemetry-proto = { version = "0.7.0", default-features = false } +papaya = "0.1.4" parking_lot = "0.12.1" path-slash = "0.2.1" pest = "2.1.3" diff --git a/relay-config/src/config.rs b/relay-config/src/config.rs index 2a98bc63a3..6f9ac83e2e 100644 --- a/relay-config/src/config.rs +++ b/relay-config/src/config.rs @@ -1017,7 +1017,9 @@ struct Cache { /// The cache timeout for project configurations in seconds. project_expiry: u32, /// Continue using project state this many seconds after cache expiry while a new state is - /// being fetched. This is added on top of `project_expiry` and `miss_expiry`. Default is 0. + /// being fetched. This is added on top of `project_expiry`. + /// + /// Default is 2 minutes. project_grace_period: u32, /// The cache timeout for downstream relay info (public keys) in seconds. relay_expiry: u32, @@ -1053,17 +1055,17 @@ impl Default for Cache { fn default() -> Self { Cache { project_request_full_config: false, - project_expiry: 300, // 5 minutes - project_grace_period: 0, - relay_expiry: 3600, // 1 hour - envelope_expiry: 600, // 10 minutes + project_expiry: 300, // 5 minutes + project_grace_period: 120, // 2 minutes + relay_expiry: 3600, // 1 hour + envelope_expiry: 600, // 10 minutes envelope_buffer_size: 1000, miss_expiry: 60, // 1 minute batch_interval: 100, // 100ms downstream_relays_batch_interval: 100, // 100ms batch_size: 500, file_interval: 10, // 10 seconds - eviction_interval: 60, // 60 seconds + eviction_interval: 15, // 15 seconds global_config_fetch_interval: 10, // 10 seconds } } diff --git a/relay-dynamic-config/src/error_boundary.rs b/relay-dynamic-config/src/error_boundary.rs index c80d2ab8b7..dc4948a3b0 100644 --- a/relay-dynamic-config/src/error_boundary.rs +++ b/relay-dynamic-config/src/error_boundary.rs @@ -42,6 +42,14 @@ impl ErrorBoundary { } } + /// Converts from `ErrorBoundary` to `ErrorBoundary<&T>`. + pub fn as_ref(&self) -> ErrorBoundary<&T> { + match self { + Self::Ok(t) => ErrorBoundary::Ok(t), + Self::Err(e) => ErrorBoundary::Err(Arc::clone(e)), + } + } + /// Returns the contained [`Ok`] value or computes it from a closure. #[inline] pub fn unwrap_or_else(self, op: F) -> T diff --git a/relay-quotas/src/rate_limit.rs b/relay-quotas/src/rate_limit.rs index c1162d340e..ba90bacaca 100644 --- a/relay-quotas/src/rate_limit.rs +++ b/relay-quotas/src/rate_limit.rs @@ -1,5 +1,6 @@ use std::fmt; use std::str::FromStr; +use std::sync::{Arc, Mutex, PoisonError}; use std::time::{Duration, Instant}; use relay_base_schema::metrics::MetricNamespace; @@ -279,15 +280,14 @@ impl RateLimits { } /// Removes expired rate limits from this instance. - pub fn clean_expired(&mut self) { - let now = Instant::now(); + pub fn clean_expired(&mut self, now: Instant) { self.limits .retain(|limit| !limit.retry_after.expired_at(now)); } /// Checks whether any rate limits apply to the given scoping. /// - /// If no limits match, then the returned `RateLimits` instance evalutes `is_ok`. Otherwise, it + /// If no limits match, then the returned `RateLimits` instance evaluates `is_ok`. Otherwise, it /// contains rate limits that match the given scoping. pub fn check(&self, scoping: ItemScoping<'_>) -> Self { self.check_with_quotas(&[], scoping) @@ -298,7 +298,7 @@ impl RateLimits { /// This is similar to `check`. Additionally, it checks for quotas with a static limit `0`, and /// rejects items even if there is no active rate limit in this instance. /// - /// If no limits or quotas match, then the returned `RateLimits` instance evalutes `is_ok`. + /// If no limits or quotas match, then the returned `RateLimits` instance evaluates `is_ok`. /// Otherwise, it contains rate limits that match the given scoping. pub fn check_with_quotas<'a>( &self, @@ -339,7 +339,7 @@ impl RateLimits { /// Returns `true` if there are any limits contained. /// - /// This is equavalent to checking whether [`Self::longest`] returns `Some` + /// This is equivalent to checking whether [`Self::longest`] returns `Some` /// or [`Self::iter`] returns an iterator with at least one item. pub fn is_empty(&self) -> bool { self.limits.is_empty() @@ -402,7 +402,7 @@ impl<'a> IntoIterator for &'a RateLimits { /// /// The data structure makes sure no expired rate limits are enforced. #[derive(Debug, Default)] -pub struct CachedRateLimits(RateLimits); +pub struct CachedRateLimits(Mutex>); impl CachedRateLimits { /// Creates a new, empty instance without any rate limits enforced. @@ -413,25 +413,31 @@ impl CachedRateLimits { /// Adds a limit to this collection. /// /// See also: [`RateLimits::add`]. - pub fn add(&mut self, limit: RateLimit) { - self.0.add(limit); + pub fn add(&self, limit: RateLimit) { + let mut inner = self.0.lock().unwrap_or_else(PoisonError::into_inner); + let current = Arc::make_mut(&mut inner); + current.add(limit); } /// Merges more rate limits into this instance. /// /// See also: [`RateLimits::merge`]. - pub fn merge(&mut self, rate_limits: RateLimits) { - for limit in rate_limits { - self.add(limit) + pub fn merge(&self, limits: RateLimits) { + let mut inner = self.0.lock().unwrap_or_else(PoisonError::into_inner); + let current = Arc::make_mut(&mut inner); + for limit in limits { + current.add(limit) } } /// Returns a reference to the contained rate limits. /// - /// This call gurantuess that at the time of call no returned rate limit is expired. - pub fn current_limits(&mut self) -> &RateLimits { - self.0.clean_expired(); - &self.0 + /// This call guarantees that at the time of call no returned rate limit is expired. + pub fn current_limits(&self) -> Arc { + let now = Instant::now(); + let mut inner = self.0.lock().unwrap_or_else(PoisonError::into_inner); + Arc::make_mut(&mut inner).clean_expired(now); + Arc::clone(&inner) } } @@ -927,7 +933,7 @@ mod tests { // Sanity check before running `clean_expired` assert_eq!(rate_limits.iter().count(), 2); - rate_limits.clean_expired(); + rate_limits.clean_expired(Instant::now()); // Check that the expired limit has been removed insta::assert_ron_snapshot!(rate_limits, @r###" @@ -1120,7 +1126,7 @@ mod tests { #[test] fn test_cached_rate_limits_expired() { - let mut cached = CachedRateLimits::new(); + let cached = CachedRateLimits::new(); // Active error limit cached.add(RateLimit { diff --git a/relay-sampling/src/evaluation.rs b/relay-sampling/src/evaluation.rs index 7f08dc8fa8..c3b3944dd7 100644 --- a/relay-sampling/src/evaluation.rs +++ b/relay-sampling/src/evaluation.rs @@ -70,7 +70,7 @@ impl<'a> ReservoirEvaluator<'a> { Arc::clone(&self.counters) } - /// Sets the Redis pool and organiation ID for the [`ReservoirEvaluator`]. + /// Sets the Redis pool and organization ID for the [`ReservoirEvaluator`]. /// /// These values are needed to synchronize with Redis. #[cfg(feature = "redis")] diff --git a/relay-server/Cargo.toml b/relay-server/Cargo.toml index dbec10dc95..34862cd152 100644 --- a/relay-server/Cargo.toml +++ b/relay-server/Cargo.toml @@ -29,6 +29,7 @@ processing = [ workspace = true [dependencies] +ahash = { workspace = true } anyhow = { workspace = true } serde_path_to_error = { workspace = true } axum = { workspace = true, features = ["macros", "matched-path", "tracing"] } @@ -53,6 +54,7 @@ mime = { workspace = true } minidump = { workspace = true, optional = true } multer = { workspace = true } once_cell = { workspace = true } +papaya = { workspace = true } pin-project-lite = { workspace = true } priority-queue = { workspace = true } rand = { workspace = true } diff --git a/relay-server/src/endpoints/batch_metrics.rs b/relay-server/src/endpoints/batch_metrics.rs index 4d108df90f..19d1395168 100644 --- a/relay-server/src/endpoints/batch_metrics.rs +++ b/relay-server/src/endpoints/batch_metrics.rs @@ -4,8 +4,7 @@ use serde::{Deserialize, Serialize}; use crate::extractors::{ReceivedAt, SignedBytes}; use crate::service::ServiceState; -use crate::services::processor::ProcessBatchedMetrics; -use crate::services::projects::cache::BucketSource; +use crate::services::processor::{BucketSource, ProcessBatchedMetrics}; #[derive(Debug, Serialize, Deserialize)] struct SendMetricsResponse {} diff --git a/relay-server/src/endpoints/common.rs b/relay-server/src/endpoints/common.rs index a70fec4722..7430beb6dc 100644 --- a/relay-server/src/endpoints/common.rs +++ b/relay-server/src/endpoints/common.rs @@ -12,8 +12,8 @@ use crate::envelope::{AttachmentType, Envelope, EnvelopeError, Item, ItemType, I use crate::service::ServiceState; use crate::services::buffer::EnvelopeBuffer; use crate::services::outcome::{DiscardReason, Outcome}; -use crate::services::processor::{MetricData, ProcessingGroup}; -use crate::services::projects::cache::{CheckEnvelope, ProcessMetrics, ValidateEnvelope}; +use crate::services::processor::{BucketSource, MetricData, ProcessMetrics, ProcessingGroup}; +use crate::services::projects::cache::legacy::ValidateEnvelope; use crate::statsd::{RelayCounters, RelayHistograms}; use crate::utils::{self, ApiErrorResponse, FormDataIter, ManagedEnvelope}; @@ -40,9 +40,6 @@ impl IntoResponse for ServiceUnavailable { /// Error type for all store-like requests. #[derive(Debug, thiserror::Error)] pub enum BadStoreRequest { - #[error("could not schedule event processing")] - ScheduleFailed, - #[error("empty request body")] EmptyBody, @@ -113,7 +110,7 @@ impl IntoResponse for BadStoreRequest { (StatusCode::TOO_MANY_REQUESTS, headers, body).into_response() } - BadStoreRequest::ScheduleFailed | BadStoreRequest::QueueFailed => { + BadStoreRequest::QueueFailed => { // These errors indicate that something's wrong with our service system, most likely // mailbox congestion or a faulty shutdown. Indicate an unavailable service to the // client. It might retry event submission at a later time. @@ -274,12 +271,12 @@ fn queue_envelope( if !metric_items.is_empty() { relay_log::trace!("sending metrics into processing queue"); - state.project_cache().send(ProcessMetrics { + state.processor().send(ProcessMetrics { data: MetricData::Raw(metric_items.into_vec()), received_at: envelope.received_at(), sent_at: envelope.sent_at(), project_key: envelope.meta().public_key(), - source: envelope.meta().into(), + source: BucketSource::from_meta(envelope.meta()), }); } } @@ -312,7 +309,9 @@ fn queue_envelope( } None => { relay_log::trace!("Sending envelope to project cache for V1 buffer"); - state.project_cache().send(ValidateEnvelope::new(envelope)); + state + .legacy_project_cache() + .send(ValidateEnvelope::new(envelope)); } } } @@ -369,10 +368,9 @@ pub async fn handle_envelope( } let checked = state - .project_cache() - .send(CheckEnvelope::new(managed_envelope)) - .await - .map_err(|_| BadStoreRequest::ScheduleFailed)? + .project_cache_handle() + .get(managed_envelope.scoping().project_key) + .check_envelope(managed_envelope) .map_err(BadStoreRequest::EventRejected)?; let Some(mut managed_envelope) = checked.envelope else { diff --git a/relay-server/src/endpoints/project_configs.rs b/relay-server/src/endpoints/project_configs.rs index 391687a31f..ff59384618 100644 --- a/relay-server/src/endpoints/project_configs.rs +++ b/relay-server/src/endpoints/project_configs.rs @@ -3,10 +3,9 @@ use std::sync::Arc; use axum::extract::{Query, Request}; use axum::handler::Handler; +use axum::http::StatusCode; use axum::response::{IntoResponse, Result}; use axum::{Json, RequestExt}; -use futures::stream::FuturesUnordered; -use futures::StreamExt; use relay_base_schema::project::ProjectKey; use relay_dynamic_config::{ErrorBoundary, GlobalConfig}; use serde::{Deserialize, Serialize}; @@ -16,15 +15,10 @@ use crate::endpoints::forward; use crate::extractors::SignedJson; use crate::service::ServiceState; use crate::services::global_config::{self, StatusResponse}; -use crate::services::projects::cache::{GetCachedProjectState, GetProjectState}; use crate::services::projects::project::{ - LimitedParsedProjectState, ParsedProjectState, ProjectState, + LimitedParsedProjectState, ParsedProjectState, ProjectState, Revision, }; - -/// V2 version of this endpoint. -/// -/// The request is a list of [`ProjectKey`]s and the response a list of [`ProjectStateWrapper`]s -const ENDPOINT_V2: u16 = 2; +use crate::utils::ApiErrorResponse; /// V3 version of this endpoint. /// @@ -34,6 +28,16 @@ const ENDPOINT_V2: u16 = 2; /// returned, or a further poll ensues. const ENDPOINT_V3: u16 = 3; +#[derive(Debug, Clone, Copy, thiserror::Error)] +#[error("This API version is no longer supported, upgrade your Relay or Client")] +struct VersionOutdatedError; + +impl IntoResponse for VersionOutdatedError { + fn into_response(self) -> axum::response::Response { + (StatusCode::BAD_REQUEST, ApiErrorResponse::from_error(&self)).into_response() + } +} + /// Helper to deserialize the `version` query parameter. #[derive(Clone, Copy, Debug, Deserialize)] struct VersionQuery { @@ -102,19 +106,17 @@ struct GetProjectStatesRequest { /// /// This length of this list if specified must be the same length /// as [`Self::public_keys`], the items are asssociated by index. - revisions: Option>>>, + revisions: Option>>, #[serde(default)] full_config: bool, #[serde(default)] - no_cache: bool, - #[serde(default)] global: bool, } fn into_valid_keys( public_keys: Vec>, - revisions: Option>>>, -) -> impl Iterator)> { + revisions: Option>>, +) -> impl Iterator { let mut revisions = revisions.and_then(|e| e.ok()).unwrap_or_default(); if !revisions.is_empty() && revisions.len() != public_keys.len() { // The downstream sent us a different amount of revisions than project keys, @@ -127,7 +129,9 @@ fn into_valid_keys( ); revisions.clear(); } - let revisions = revisions.into_iter().chain(std::iter::repeat(None)); + let revisions = revisions + .into_iter() + .chain(std::iter::repeat_with(Revision::default)); std::iter::zip(public_keys, revisions).filter_map(|(public_key, revision)| { // Skip unparsable public keys. @@ -139,30 +143,9 @@ fn into_valid_keys( async fn inner( state: ServiceState, - Query(version): Query, body: SignedJson, ) -> Result { let SignedJson { inner, relay } = body; - let project_cache = &state.project_cache().clone(); - - let no_cache = inner.no_cache; - let keys_len = inner.public_keys.len(); - - let mut futures: FuturesUnordered<_> = into_valid_keys(inner.public_keys, inner.revisions) - .map(|(project_key, revision)| async move { - let state_result = if version.version >= ENDPOINT_V3 && !no_cache { - project_cache - .send(GetCachedProjectState::new(project_key)) - .await - } else { - project_cache - .send(GetProjectState::new(project_key).no_cache(no_cache)) - .await - }; - - (project_key, revision, state_result) - }) - .collect(); let (global, global_status) = if inner.global { match state.global_config().send(global_config::Get).await? { @@ -178,12 +161,15 @@ async fn inner( (None, None) }; + let keys_len = inner.public_keys.len(); let mut pending = Vec::with_capacity(keys_len); let mut unchanged = Vec::with_capacity(keys_len); let mut configs = HashMap::with_capacity(keys_len); - while let Some((project_key, revision, state_result)) = futures.next().await { - let project_info = match state_result? { + for (project_key, revision) in into_valid_keys(inner.public_keys, inner.revisions) { + let project = state.project_cache_handle().get(project_key); + + let project_info = match project.state() { ProjectState::Enabled(info) => info, ProjectState::Disabled => { // Don't insert project config. Downstream Relay will consider it disabled. @@ -196,7 +182,7 @@ async fn inner( }; // Only ever omit responses when there was a valid revision in the first place. - if revision.is_some() && project_info.rev == revision { + if project_info.rev == revision { unchanged.push(project_key); continue; } @@ -237,9 +223,14 @@ async fn inner( })) } +/// Returns `true` for all `?version` query parameters that are no longer supported by Relay and Sentry. +fn is_outdated(Query(query): Query) -> bool { + query.version < ENDPOINT_V3 +} + /// Returns `true` if the `?version` query parameter is compatible with this implementation. fn is_compatible(Query(query): Query) -> bool { - query.version >= ENDPOINT_V2 && query.version <= ENDPOINT_V3 + query.version == ENDPOINT_V3 } /// Endpoint handler for the project configs endpoint. @@ -255,9 +246,11 @@ fn is_compatible(Query(query): Query) -> bool { /// support old Relay versions. pub async fn handle(state: ServiceState, mut req: Request) -> Result { let data = req.extract_parts().await?; - Ok(if is_compatible(data) { - inner.call(req, state).await + if is_outdated(data) { + Err(VersionOutdatedError.into()) + } else if is_compatible(data) { + Ok(inner.call(req, state).await) } else { - forward::forward(state, req).await - }) + Ok(forward::forward(state, req).await) + } } diff --git a/relay-server/src/service.rs b/relay-server/src/service.rs index f4c8051284..ac953b13ff 100644 --- a/relay-server/src/service.rs +++ b/relay-server/src/service.rs @@ -1,5 +1,4 @@ use std::convert::Infallible; -use std::fmt; use std::sync::Arc; use std::time::Duration; @@ -8,11 +7,12 @@ use crate::services::buffer::{self, EnvelopeBufferService, ObservableEnvelopeBuf use crate::services::cogs::{CogsService, CogsServiceRecorder}; use crate::services::global_config::{GlobalConfigManager, GlobalConfigService}; use crate::services::health_check::{HealthCheck, HealthCheckService}; -use crate::services::metrics::{Aggregator, RouterService}; +use crate::services::metrics::RouterService; use crate::services::outcome::{OutcomeProducer, OutcomeProducerService, TrackOutcome}; use crate::services::outcome_aggregator::OutcomeAggregator; use crate::services::processor::{self, EnvelopeProcessor, EnvelopeProcessorService}; -use crate::services::projects::cache::{ProjectCache, ProjectCacheService, Services}; +use crate::services::projects::cache::{legacy, ProjectCacheHandle, ProjectCacheService}; +use crate::services::projects::source::ProjectSource; use crate::services::relays::{RelayCache, RelayCacheService}; use crate::services::stats::RelayStats; #[cfg(feature = "processing")] @@ -52,9 +52,8 @@ pub enum ServiceError { Redis, } -#[derive(Clone)] +#[derive(Clone, Debug)] pub struct Registry { - pub aggregator: Addr, pub health_check: Addr, pub outcome_producer: Addr, pub outcome_aggregator: Addr, @@ -62,21 +61,11 @@ pub struct Registry { pub test_store: Addr, pub relay_cache: Addr, pub global_config: Addr, - pub project_cache: Addr, + pub legacy_project_cache: Addr, pub upstream_relay: Addr, pub envelope_buffer: Option, -} -impl fmt::Debug for Registry { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("Registry") - .field("aggregator", &self.aggregator) - .field("health_check", &self.health_check) - .field("outcome_producer", &self.outcome_producer) - .field("outcome_aggregator", &self.outcome_aggregator) - .field("processor", &format_args!("Addr")) - .finish() - } + pub project_cache_handle: ProjectCacheHandle, } /// Constructs a tokio [`Runtime`] configured for running [services](relay_system::Service). @@ -144,6 +133,7 @@ fn create_store_pool(config: &Config) -> Result { struct StateInner { config: Arc, memory_checker: MemoryChecker, + registry: Registry, } @@ -198,12 +188,23 @@ impl ServiceState { // service fail if the service is not running. let global_config = global_config.start(); - let (project_cache, project_cache_rx) = channel(ProjectCacheService::name()); + let (legacy_project_cache, legacy_project_cache_rx) = + channel(legacy::ProjectCacheService::name()); + + let project_source = ProjectSource::start( + Arc::clone(&config), + upstream_relay.clone(), + redis_pools + .as_ref() + .map(|pools| pools.project_configs.clone()), + ); + let project_cache_handle = + ProjectCacheService::new(Arc::clone(&config), project_source).start(); let aggregator = RouterService::new( config.default_aggregator_config().clone(), config.secondary_aggregator_configs().clone(), - Some(project_cache.clone().recipient()), + Some(legacy_project_cache.clone().recipient()), ); let aggregator_handle = aggregator.handle(); let aggregator = aggregator.start(); @@ -238,11 +239,11 @@ impl ServiceState { create_processor_pool(&config)?, config.clone(), global_config_handle, + project_cache_handle.clone(), cogs, #[cfg(feature = "processing")] redis_pools.clone(), processor::Addrs { - project_cache: project_cache.clone(), outcome_aggregator: outcome_aggregator.clone(), upstream_relay: upstream_relay.clone(), test_store: test_store.clone(), @@ -261,7 +262,7 @@ impl ServiceState { global_config_rx.clone(), buffer::Services { envelopes_tx, - project_cache: project_cache.clone(), + project_cache_handle: project_cache_handle.clone(), outcome_aggregator: outcome_aggregator.clone(), test_store: test_store.clone(), }, @@ -269,27 +270,24 @@ impl ServiceState { .map(|b| b.start_observable()); // Keep all the services in one context. - let project_cache_services = Services { + let project_cache_services = legacy::Services { envelope_buffer: envelope_buffer.as_ref().map(ObservableEnvelopeBuffer::addr), aggregator: aggregator.clone(), envelope_processor: processor.clone(), outcome_aggregator: outcome_aggregator.clone(), - project_cache: project_cache.clone(), + project_cache: legacy_project_cache.clone(), test_store: test_store.clone(), - upstream_relay: upstream_relay.clone(), }; - ProjectCacheService::new( + legacy::ProjectCacheService::new( config.clone(), MemoryChecker::new(memory_stat.clone(), config.clone()), + project_cache_handle.clone(), project_cache_services, global_config_rx, envelopes_rx, - redis_pools - .as_ref() - .map(|pools| pools.project_configs.clone()), ) - .spawn_handler(project_cache_rx); + .spawn_handler(legacy_project_cache_rx); let health_check = HealthCheckService::new( config.clone(), @@ -311,7 +309,6 @@ impl ServiceState { let relay_cache = RelayCacheService::new(config.clone(), upstream_relay.clone()).start(); let registry = Registry { - aggregator, processor, health_check, outcome_producer, @@ -319,7 +316,8 @@ impl ServiceState { test_store, relay_cache, global_config, - project_cache, + legacy_project_cache, + project_cache_handle, upstream_relay, envelope_buffer, }; @@ -352,9 +350,14 @@ impl ServiceState { self.inner.registry.envelope_buffer.as_ref() } - /// Returns the address of the [`ProjectCache`] service. - pub fn project_cache(&self) -> &Addr { - &self.inner.registry.project_cache + /// Returns the address of the [`legacy::ProjectCache`] service. + pub fn legacy_project_cache(&self) -> &Addr { + &self.inner.registry.legacy_project_cache + } + + /// Returns a [`ProjectCacheHandle`]. + pub fn project_cache_handle(&self) -> &ProjectCacheHandle { + &self.inner.registry.project_cache_handle } /// Returns the address of the [`RelayCache`] service. diff --git a/relay-server/src/services/buffer/mod.rs b/relay-server/src/services/buffer/mod.rs index eeb561f16f..ff8d1cfd36 100644 --- a/relay-server/src/services/buffer/mod.rs +++ b/relay-server/src/services/buffer/mod.rs @@ -21,8 +21,7 @@ use crate::services::outcome::DiscardReason; use crate::services::outcome::Outcome; use crate::services::outcome::TrackOutcome; use crate::services::processor::ProcessingGroup; -use crate::services::projects::cache::{DequeuedEnvelope, ProjectCache, UpdateProject}; - +use crate::services::projects::cache::{legacy, ProjectCacheHandle, ProjectChange}; use crate::services::test_store::TestStore; use crate::statsd::{RelayCounters, RelayHistograms}; use crate::utils::ManagedEnvelope; @@ -99,8 +98,8 @@ impl ObservableEnvelopeBuffer { pub struct Services { /// Bounded channel used exclusively to handle backpressure when sending envelopes to the /// project cache. - pub envelopes_tx: mpsc::Sender, - pub project_cache: Addr, + pub envelopes_tx: mpsc::Sender, + pub project_cache_handle: ProjectCacheHandle, pub outcome_aggregator: Addr, pub test_store: Addr, } @@ -157,7 +156,7 @@ impl EnvelopeBufferService { &mut self, buffer: &PolymorphicEnvelopeBuffer, dequeue: bool, - ) -> Option> { + ) -> Option> { relay_statsd::metric!( counter(RelayCounters::BufferReadyToPop) += 1, status = "checking" @@ -218,7 +217,7 @@ impl EnvelopeBufferService { config: &Config, buffer: &mut PolymorphicEnvelopeBuffer, services: &Services, - envelopes_tx_permit: Permit<'a, DequeuedEnvelope>, + envelopes_tx_permit: Permit<'a, legacy::DequeuedEnvelope>, ) -> Result { let sleep = match buffer.peek().await? { Peek::Empty => { @@ -251,7 +250,7 @@ impl EnvelopeBufferService { .pop() .await? .expect("Element disappeared despite exclusive excess"); - envelopes_tx_permit.send(DequeuedEnvelope(envelope)); + envelopes_tx_permit.send(legacy::DequeuedEnvelope(envelope)); Duration::ZERO // try next pop immediately } @@ -269,12 +268,12 @@ impl EnvelopeBufferService { relay_log::trace!("EnvelopeBufferService: requesting project(s) update"); let own_key = envelope.meta().public_key(); - services.project_cache.send(UpdateProject(own_key)); + services.project_cache_handle.fetch(own_key); match envelope.sampling_key() { None => {} Some(sampling_key) if sampling_key == own_key => {} // already sent. Some(sampling_key) => { - services.project_cache.send(UpdateProject(sampling_key)); + services.project_cache_handle.fetch(sampling_key); } } @@ -398,6 +397,7 @@ impl Service for EnvelopeBufferService { buffer.initialize().await; let mut shutdown = Controller::shutdown_handle(); + let mut project_events = self.services.project_cache_handle.changes(); relay_log::info!("EnvelopeBufferService: starting"); loop { @@ -427,6 +427,10 @@ impl Service for EnvelopeBufferService { } } } + Ok(ProjectChange::Ready(project_key)) = project_events.recv() => { + Self::handle_message(&mut buffer, EnvelopeBuffer::Ready(project_key)).await; + sleep = Duration::ZERO; + } Some(message) = rx.recv() => { Self::handle_message(&mut buffer, message).await; sleep = Duration::ZERO; @@ -483,8 +487,8 @@ mod tests { struct EnvelopeBufferServiceResult { service: EnvelopeBufferService, global_tx: watch::Sender, - envelopes_rx: mpsc::Receiver, - project_cache_rx: mpsc::UnboundedReceiver, + envelopes_rx: mpsc::Receiver, + project_cache_handle: ProjectCacheHandle, outcome_aggregator_rx: mpsc::UnboundedReceiver, } @@ -492,6 +496,8 @@ mod tests { config_json: Option, global_config_status: global_config::Status, ) -> EnvelopeBufferServiceResult { + relay_log::init_test!(); + let config_json = config_json.unwrap_or(serde_json::json!({ "spool": { "envelopes": { @@ -504,8 +510,8 @@ mod tests { let memory_stat = MemoryStat::default(); let (global_tx, global_rx) = watch::channel(global_config_status); let (envelopes_tx, envelopes_rx) = mpsc::channel(5); - let (project_cache, project_cache_rx) = Addr::custom(); let (outcome_aggregator, outcome_aggregator_rx) = Addr::custom(); + let project_cache_handle = ProjectCacheHandle::for_test(); let envelope_buffer_service = EnvelopeBufferService::new( config, @@ -513,7 +519,7 @@ mod tests { global_rx, Services { envelopes_tx, - project_cache, + project_cache_handle: project_cache_handle.clone(), outcome_aggregator, test_store: Addr::dummy(), }, @@ -524,7 +530,7 @@ mod tests { service: envelope_buffer_service, global_tx, envelopes_rx, - project_cache_rx, + project_cache_handle, outcome_aggregator_rx, } } @@ -537,7 +543,7 @@ mod tests { service, global_tx: _global_tx, envelopes_rx: _envelopes_rx, - project_cache_rx: _project_cache_rx, + project_cache_handle: _project_cache_handle, outcome_aggregator_rx: _outcome_aggregator_rx, } = envelope_buffer_service(None, global_config::Status::Pending); @@ -562,8 +568,8 @@ mod tests { service, global_tx, envelopes_rx, - project_cache_rx, outcome_aggregator_rx: _outcome_aggregator_rx, + .. } = envelope_buffer_service(None, global_config::Status::Pending); let addr = service.start(); @@ -576,7 +582,6 @@ mod tests { tokio::time::sleep(Duration::from_millis(1000)).await; assert_eq!(envelopes_rx.len(), 0); - assert_eq!(project_cache_rx.len(), 0); global_tx.send_replace(global_config::Status::Ready(Arc::new( GlobalConfig::default(), @@ -585,7 +590,6 @@ mod tests { tokio::time::sleep(Duration::from_millis(1000)).await; assert_eq!(envelopes_rx.len(), 1); - assert_eq!(project_cache_rx.len(), 0); } #[tokio::test] @@ -595,9 +599,9 @@ mod tests { let EnvelopeBufferServiceResult { service, envelopes_rx, - project_cache_rx, outcome_aggregator_rx: _outcome_aggregator_rx, global_tx: _global_tx, + .. } = envelope_buffer_service( Some(serde_json::json!({ "spool": { @@ -623,7 +627,6 @@ mod tests { tokio::time::sleep(Duration::from_millis(1000)).await; assert_eq!(envelopes_rx.len(), 0); - assert_eq!(project_cache_rx.len(), 0); } #[tokio::test] @@ -633,7 +636,7 @@ mod tests { let EnvelopeBufferServiceResult { service, envelopes_rx, - project_cache_rx, + project_cache_handle: _project_cache_handle, mut outcome_aggregator_rx, global_tx: _global_tx, } = envelope_buffer_service( @@ -661,7 +664,6 @@ mod tests { tokio::time::sleep(Duration::from_millis(100)).await; assert_eq!(envelopes_rx.len(), 0); - assert_eq!(project_cache_rx.len(), 0); let outcome = outcome_aggregator_rx.try_recv().unwrap(); assert_eq!(outcome.category, DataCategory::TransactionIndexed); @@ -675,7 +677,7 @@ mod tests { let EnvelopeBufferServiceResult { service, mut envelopes_rx, - mut project_cache_rx, + project_cache_handle, global_tx: _global_tx, outcome_aggregator_rx: _outcome_aggregator_rx, } = envelope_buffer_service( @@ -689,10 +691,10 @@ mod tests { let project_key = envelope.meta().public_key(); addr.send(EnvelopeBuffer::Push(envelope.clone())); + tokio::time::sleep(Duration::from_secs(3)).await; - tokio::time::sleep(Duration::from_secs(1)).await; - - let Some(DequeuedEnvelope(envelope)) = envelopes_rx.recv().await else { + let message = tokio::time::timeout(Duration::from_secs(5), envelopes_rx.recv()); + let Some(legacy::DequeuedEnvelope(envelope)) = message.await.unwrap() else { panic!(); }; @@ -700,20 +702,11 @@ mod tests { tokio::time::sleep(Duration::from_millis(100)).await; - assert_eq!(project_cache_rx.len(), 1); - let message = project_cache_rx.recv().await; - assert!(matches!( - message, - Some(ProjectCache::UpdateProject(key)) if key == project_key - )); + assert_eq!(project_cache_handle.test_num_fetches(), 1); - tokio::time::sleep(Duration::from_secs(1)).await; + tokio::time::sleep(Duration::from_millis(1300)).await; - assert_eq!(project_cache_rx.len(), 1); - assert!(matches!( - message, - Some(ProjectCache::UpdateProject(key)) if key == project_key - )) + assert_eq!(project_cache_handle.test_num_fetches(), 2); } #[tokio::test] @@ -724,8 +717,8 @@ mod tests { service, mut envelopes_rx, global_tx: _global_tx, - project_cache_rx: _project_cache_rx, outcome_aggregator_rx: _outcome_aggregator_rx, + .. } = envelope_buffer_service( None, global_config::Status::Ready(Arc::new(GlobalConfig::default())), @@ -748,7 +741,7 @@ mod tests { assert_eq!( messages .iter() - .filter(|message| matches!(message, DequeuedEnvelope(..))) + .filter(|message| matches!(message, legacy::DequeuedEnvelope(..))) .count(), 5 ); @@ -761,7 +754,7 @@ mod tests { assert_eq!( messages .iter() - .filter(|message| matches!(message, DequeuedEnvelope(..))) + .filter(|message| matches!(message, legacy::DequeuedEnvelope(..))) .count(), 5 ); diff --git a/relay-server/src/services/processor.rs b/relay-server/src/services/processor.rs index c528ac1643..d5f5ef58d9 100644 --- a/relay-server/src/services/processor.rs +++ b/relay-server/src/services/processor.rs @@ -72,9 +72,7 @@ use crate::services::global_config::GlobalConfigHandle; use crate::services::metrics::{Aggregator, MergeBuckets}; use crate::services::outcome::{DiscardReason, Outcome, TrackOutcome}; use crate::services::processor::event::FiltersStatus; -use crate::services::projects::cache::{ - BucketSource, ProcessMetrics, ProjectCache, UpdateRateLimits, -}; +use crate::services::projects::cache::ProjectCacheHandle; use crate::services::projects::project::{ProjectInfo, ProjectState}; use crate::services::test_store::{Capture, TestStore}; use crate::services::upstream::{ @@ -767,7 +765,7 @@ struct ProcessEnvelopeState<'a, Group> { /// Currently active cached rate limits of the project this envelope belongs to. #[cfg_attr(not(feature = "processing"), expect(dead_code))] - rate_limits: RateLimits, + rate_limits: Arc, /// The config of this Relay instance. config: Arc, @@ -884,7 +882,7 @@ pub struct ProcessEnvelope { /// The project info. pub project_info: Arc, /// Currently active cached rate limits for this project. - pub rate_limits: RateLimits, + pub rate_limits: Arc, /// Root sampling project info. pub sampling_project_info: Option>, /// Sampling reservoir counters. @@ -903,16 +901,7 @@ pub struct ProcessEnvelope { /// Additionally, processing applies clock drift correction using the system clock of this Relay, if /// the Envelope specifies the [`sent_at`](Envelope::sent_at) header. #[derive(Debug)] -pub struct ProcessProjectMetrics { - /// The project state the metrics belong to. - /// - /// The project state can be pending, in which case cached rate limits - /// and other project specific operations are skipped and executed once - /// the project state becomes available. - pub project_state: ProjectState, - /// Currently active cached rate limits for this project. - pub rate_limits: RateLimits, - +pub struct ProcessMetrics { /// A list of metric items. pub data: MetricData, /// The target project. @@ -1000,6 +989,32 @@ pub struct ProcessBatchedMetrics { pub sent_at: Option>, } +/// Source information where a metric bucket originates from. +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub enum BucketSource { + /// The metric bucket originated from an internal Relay use case. + /// + /// The metric bucket originates either from within the same Relay + /// or was accepted coming from another Relay which is registered as + /// an internal Relay via Relay's configuration. + Internal, + /// The bucket source originated from an untrusted source. + /// + /// Managed Relays sending extracted metrics are considered external, + /// it's a project use case but it comes from an untrusted source. + External, +} + +impl BucketSource { + /// Infers the bucket source from [`RequestMeta::is_from_internal_relay`]. + pub fn from_meta(meta: &RequestMeta) -> Self { + match meta.is_from_internal_relay() { + true => Self::Internal, + false => Self::External, + } + } +} + /// Metric buckets with additional project. #[derive(Debug, Clone)] pub struct ProjectMetrics { @@ -1008,7 +1023,7 @@ pub struct ProjectMetrics { /// Project info for extracting quotas. pub project_info: Arc, /// Currently cached rate limits. - pub rate_limits: RateLimits, + pub rate_limits: Arc, } /// Encodes metrics into an envelope ready to be sent upstream. @@ -1037,7 +1052,7 @@ pub struct SubmitClientReports { #[derive(Debug)] pub enum EnvelopeProcessor { ProcessEnvelope(Box), - ProcessProjectMetrics(Box), + ProcessProjectMetrics(Box), ProcessBatchedMetrics(Box), EncodeMetrics(Box), SubmitEnvelope(Box), @@ -1068,10 +1083,10 @@ impl FromMessage for EnvelopeProcessor { } } -impl FromMessage for EnvelopeProcessor { +impl FromMessage for EnvelopeProcessor { type Response = NoResponse; - fn from_message(message: ProcessProjectMetrics, _: ()) -> Self { + fn from_message(message: ProcessMetrics, _: ()) -> Self { Self::ProcessProjectMetrics(Box::new(message)) } } @@ -1118,7 +1133,6 @@ pub struct EnvelopeProcessorService { /// Contains the addresses of services that the processor publishes to. pub struct Addrs { - pub project_cache: Addr, pub outcome_aggregator: Addr, pub upstream_relay: Addr, pub test_store: Addr, @@ -1130,7 +1144,6 @@ pub struct Addrs { impl Default for Addrs { fn default() -> Self { Addrs { - project_cache: Addr::dummy(), outcome_aggregator: Addr::dummy(), upstream_relay: Addr::dummy(), test_store: Addr::dummy(), @@ -1145,6 +1158,7 @@ struct InnerProcessor { workers: WorkerGroup, config: Arc, global_config: GlobalConfigHandle, + project_cache: ProjectCacheHandle, cogs: Cogs, #[cfg(feature = "processing")] quotas_pool: Option, @@ -1159,10 +1173,12 @@ struct InnerProcessor { impl EnvelopeProcessorService { /// Creates a multi-threaded envelope processor. + #[cfg_attr(feature = "processing", expect(clippy::too_many_arguments))] pub fn new( pool: ThreadPool, config: Arc, global_config: GlobalConfigHandle, + project_cache: ProjectCacheHandle, cogs: Cogs, #[cfg(feature = "processing")] redis: Option, addrs: Addrs, @@ -1191,6 +1207,7 @@ impl EnvelopeProcessorService { let inner = InnerProcessor { workers: WorkerGroup::new(pool), global_config, + project_cache, cogs, #[cfg(feature = "processing")] quotas_pool: quotas.clone(), @@ -1256,7 +1273,7 @@ impl EnvelopeProcessorService { mut managed_envelope: TypedEnvelope, project_id: ProjectId, project_info: Arc, - rate_limits: RateLimits, + rate_limits: Arc, sampling_project_info: Option>, reservoir_counters: Arc>>, ) -> ProcessEnvelopeState { @@ -1327,10 +1344,11 @@ impl EnvelopeProcessorService { // Update cached rate limits with the freshly computed ones. if !limits.is_empty() { - self.inner.addrs.project_cache.send(UpdateRateLimits::new( - state.managed_envelope.scoping().project_key, - limits, - )); + self.inner + .project_cache + .get(state.managed_envelope.scoping().project_key) + .rate_limits() + .merge(limits); } Ok(()) @@ -1866,7 +1884,7 @@ impl EnvelopeProcessorService { mut managed_envelope: ManagedEnvelope, project_id: ProjectId, project_info: Arc, - rate_limits: RateLimits, + rate_limits: Arc, sampling_project_info: Option>, reservoir_counters: Arc>>, ) -> Result { @@ -2084,10 +2102,8 @@ impl EnvelopeProcessorService { } } - fn handle_process_project_metrics(&self, cogs: &mut Token, message: ProcessProjectMetrics) { - let ProcessProjectMetrics { - project_state, - rate_limits, + fn handle_process_metrics(&self, cogs: &mut Token, message: ProcessMetrics) { + let ProcessMetrics { data, project_key, received_at, @@ -2126,13 +2142,16 @@ impl EnvelopeProcessorService { true }); + let project = self.inner.project_cache.get(project_key); + // Best effort check to filter and rate limit buckets, if there is no project state // available at the current time, we will check again after flushing. - let buckets = match project_state.enabled() { - Some(project_info) => { - self.check_buckets(project_key, &project_info, &rate_limits, buckets) + let buckets = match project.state() { + ProjectState::Enabled(project_info) => { + let rate_limits = project.rate_limits().current_limits(); + self.check_buckets(project_key, project_info, &rate_limits, buckets) } - None => buckets, + _ => buckets, }; relay_log::trace!("merging metric buckets into the aggregator"); @@ -2167,21 +2186,17 @@ impl EnvelopeProcessorService { } }; - let mut feature_weights = FeatureWeights::none(); for (project_key, buckets) in buckets { - feature_weights = feature_weights.merge(relay_metrics::cogs::BySize(&buckets).into()); - - self.inner.addrs.project_cache.send(ProcessMetrics { - data: MetricData::Parsed(buckets), - project_key, - source, - received_at, - sent_at, - }); - } - - if !feature_weights.is_empty() { - cogs.update(feature_weights); + self.handle_process_metrics( + cogs, + ProcessMetrics { + data: MetricData::Parsed(buckets), + project_key, + source, + received_at, + sent_at, + }, + ) } } @@ -2229,7 +2244,7 @@ impl EnvelopeProcessorService { envelope, body, http_encoding, - project_cache: self.inner.addrs.project_cache.clone(), + project_cache: self.inner.project_cache.clone(), })); } Err(error) => { @@ -2382,10 +2397,11 @@ impl EnvelopeProcessorService { Outcome::RateLimited(reason_code), ); - self.inner.addrs.project_cache.send(UpdateRateLimits::new( - item_scoping.scoping.project_key, - limits, - )); + self.inner + .project_cache + .get(item_scoping.scoping.project_key) + .rate_limits() + .merge(limits); } } @@ -2460,9 +2476,10 @@ impl EnvelopeProcessorService { if was_enforced { // Update the rate limits in the project cache. self.inner - .addrs .project_cache - .send(UpdateRateLimits::new(scoping.project_key, rate_limits)); + .get(scoping.project_key) + .rate_limits() + .merge(rate_limits); } } } @@ -2773,7 +2790,7 @@ impl EnvelopeProcessorService { match message { EnvelopeProcessor::ProcessEnvelope(m) => self.handle_process_envelope(*m), EnvelopeProcessor::ProcessProjectMetrics(m) => { - self.handle_process_project_metrics(&mut cogs, *m) + self.handle_process_metrics(&mut cogs, *m) } EnvelopeProcessor::ProcessBatchedMetrics(m) => { self.handle_process_batched_metrics(&mut cogs, *m) @@ -2919,7 +2936,7 @@ pub struct SendEnvelope { envelope: TypedEnvelope, body: Bytes, http_encoding: HttpEncoding, - project_cache: Addr, + project_cache: ProjectCacheHandle, } impl UpstreamRequest for SendEnvelope { @@ -2971,10 +2988,10 @@ impl UpstreamRequest for SendEnvelope { self.envelope.accept(); if let UpstreamRequestError::RateLimited(limits) = error { - self.project_cache.send(UpdateRateLimits::new( - scoping.project_key, - limits.scope(&scoping), - )); + self.project_cache + .get(scoping.project_key) + .rate_limits() + .merge(limits.scope(&scoping)); } } Err(error) => { @@ -3714,16 +3731,14 @@ mod tests { ), (BucketSource::Internal, None), ] { - let message = ProcessProjectMetrics { + let message = ProcessMetrics { data: MetricData::Raw(vec![item.clone()]), - project_state: ProjectState::Pending, - rate_limits: Default::default(), project_key, source, received_at, sent_at: Some(Utc::now()), }; - processor.handle_process_project_metrics(&mut token, message); + processor.handle_process_metrics(&mut token, message); let value = aggregator_rx.recv().await.unwrap(); let Aggregator::MergeBuckets(merge_buckets) = value else { @@ -3741,11 +3756,11 @@ mod tests { let received_at = Utc::now(); let config = Config::default(); - let (project_cache, mut project_cache_rx) = Addr::custom(); + let (aggregator, mut aggregator_rx) = Addr::custom(); let processor = create_test_processor_with_addrs( config, Addrs { - project_cache, + aggregator, ..Default::default() }, ); @@ -3788,78 +3803,72 @@ mod tests { }; processor.handle_process_batched_metrics(&mut token, message); - let value = project_cache_rx.recv().await.unwrap(); - let ProjectCache::ProcessMetrics(pm1) = value else { + let value = aggregator_rx.recv().await.unwrap(); + let Aggregator::MergeBuckets(mb1) = value else { panic!() }; - let value = project_cache_rx.recv().await.unwrap(); - let ProjectCache::ProcessMetrics(pm2) = value else { + let value = aggregator_rx.recv().await.unwrap(); + let Aggregator::MergeBuckets(mb2) = value else { panic!() }; - let mut messages = vec![pm1, pm2]; + let mut messages = vec![mb1, mb2]; messages.sort_by_key(|pm| pm.project_key); let actual = messages .into_iter() - .map(|pm| (pm.project_key, pm.data, pm.source)) + .map(|pm| (pm.project_key, pm.buckets)) .collect::>(); assert_debug_snapshot!(actual, @r###" [ ( ProjectKey("11111111111111111111111111111111"), - Parsed( - [ - Bucket { - timestamp: UnixTimestamp(1615889440), - width: 0, - name: MetricName( - "d:custom/endpoint.response_time@millisecond", - ), - value: Distribution( - [ - 68.0, - ], - ), - tags: { - "route": "user_index", - }, - metadata: BucketMetadata { - merges: 1, - received_at: None, - extracted_from_indexed: false, - }, + [ + Bucket { + timestamp: UnixTimestamp(1615889440), + width: 0, + name: MetricName( + "d:custom/endpoint.response_time@millisecond", + ), + value: Distribution( + [ + 68.0, + ], + ), + tags: { + "route": "user_index", }, - ], - ), - Internal, + metadata: BucketMetadata { + merges: 1, + received_at: None, + extracted_from_indexed: false, + }, + }, + ], ), ( ProjectKey("22222222222222222222222222222222"), - Parsed( - [ - Bucket { - timestamp: UnixTimestamp(1615889440), - width: 0, - name: MetricName( - "d:custom/endpoint.cache_rate@none", - ), - value: Distribution( - [ - 36.0, - ], - ), - tags: {}, - metadata: BucketMetadata { - merges: 1, - received_at: None, - extracted_from_indexed: false, - }, + [ + Bucket { + timestamp: UnixTimestamp(1615889440), + width: 0, + name: MetricName( + "d:custom/endpoint.cache_rate@none", + ), + value: Distribution( + [ + 36.0, + ], + ), + tags: {}, + metadata: BucketMetadata { + merges: 1, + received_at: None, + extracted_from_indexed: false, }, - ], - ), - Internal, + }, + ], ), ] "###); diff --git a/relay-server/src/services/processor/metrics.rs b/relay-server/src/services/processor/metrics.rs index 0d28285e77..703898e425 100644 --- a/relay-server/src/services/processor/metrics.rs +++ b/relay-server/src/services/processor/metrics.rs @@ -5,7 +5,7 @@ use relay_quotas::Scoping; use crate::metrics::MetricOutcomes; use crate::services::outcome::Outcome; -use crate::services::projects::cache::BucketSource; +use crate::services::processor::BucketSource; use crate::services::projects::project::ProjectInfo; /// Checks if the namespace of the passed bucket is valid. diff --git a/relay-server/src/services/processor/span/processing.rs b/relay-server/src/services/processor/span/processing.rs index 9ef4945a6e..a6452515b2 100644 --- a/relay-server/src/services/processor/span/processing.rs +++ b/relay-server/src/services/processor/span/processing.rs @@ -848,7 +848,7 @@ mod tests { extracted_metrics: ProcessingExtractedMetrics::new(), config: Arc::new(Config::default()), project_info, - rate_limits: RateLimits::default(), + rate_limits: Arc::new(RateLimits::default()), sampling_project_info: None, project_id: ProjectId::new(42), managed_envelope: managed_envelope.try_into().unwrap(), diff --git a/relay-server/src/services/projects/cache/handle.rs b/relay-server/src/services/projects/cache/handle.rs new file mode 100644 index 0000000000..415bad5b75 --- /dev/null +++ b/relay-server/src/services/projects/cache/handle.rs @@ -0,0 +1,104 @@ +use std::fmt; +use std::sync::Arc; + +use relay_base_schema::project::ProjectKey; +use relay_config::Config; +use relay_system::Addr; +use tokio::sync::broadcast; + +use super::state::Shared; +use crate::services::projects::cache::service::ProjectChange; +use crate::services::projects::cache::{Project, ProjectCache}; + +/// A synchronous handle to the [`ProjectCache`]. +/// +/// The handle allows lock free access to cached projects. It also acts as an interface +/// to the [`ProjectCacheService`](super::ProjectCacheService). +#[derive(Clone)] +pub struct ProjectCacheHandle { + pub(super) shared: Arc, + pub(super) config: Arc, + pub(super) service: Addr, + pub(super) project_changes: broadcast::Sender, +} + +impl ProjectCacheHandle { + /// Returns the current project state for the `project_key`. + pub fn get(&self, project_key: ProjectKey) -> Project<'_> { + let project = self.shared.get_or_create(project_key); + // Always trigger a fetch after retrieving the project to make sure the state is up to date. + self.fetch(project_key); + + Project::new(project, &self.config) + } + + /// Triggers a fetch/update check in the project cache for the supplied project. + pub fn fetch(&self, project_key: ProjectKey) { + self.service.send(ProjectCache::Fetch(project_key)); + } + + /// Returns a subscription to all [`ProjectChange`]'s. + /// + /// This stream notifies the subscriber about project state changes in the project cache. + /// Events may arrive in arbitrary order and be delivered multiple times. + pub fn changes(&self) -> broadcast::Receiver { + self.project_changes.subscribe() + } +} + +impl fmt::Debug for ProjectCacheHandle { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("ProjectCacheHandle") + .field("shared", &self.shared) + .finish() + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::services::projects::project::ProjectState; + + impl ProjectCacheHandle { + /// Creates a new [`ProjectCacheHandle`] for testing only. + /// + /// A project cache handle created this way does not require a service to function. + pub fn for_test() -> Self { + Self { + shared: Default::default(), + config: Default::default(), + service: Addr::dummy(), + project_changes: broadcast::channel(999_999).0, + } + } + + /// Sets the project state for a project. + /// + /// This can be used to emulate a project cache update in tests. + pub fn test_set_project_state(&self, project_key: ProjectKey, state: ProjectState) { + let is_pending = state.is_pending(); + self.shared.test_set_project_state(project_key, state); + if is_pending { + let _ = self + .project_changes + .send(ProjectChange::Evicted(project_key)); + } else { + let _ = self.project_changes.send(ProjectChange::Ready(project_key)); + } + } + + /// Returns `true` if there is a project created for this `project_key`. + /// + /// A project is automatically created on access via [`Self::get`]. + pub fn test_has_project_created(&self, project_key: ProjectKey) -> bool { + self.shared.test_has_project_created(project_key) + } + + /// The amount of fetches triggered for projects. + /// + /// A fetch is triggered for both [`Self::get`] and [`Self::fetch`]. + pub fn test_num_fetches(&self) -> u64 { + self.service.len() + } + } +} diff --git a/relay-server/src/services/projects/cache.rs b/relay-server/src/services/projects/cache/legacy.rs similarity index 52% rename from relay-server/src/services/projects/cache.rs rename to relay-server/src/services/projects/cache/legacy.rs index d337ee63e9..0dc95464c1 100644 --- a/relay-server/src/services/projects/cache.rs +++ b/relay-server/src/services/projects/cache/legacy.rs @@ -3,146 +3,40 @@ use std::error::Error; use std::sync::Arc; use std::time::Duration; -use crate::extractors::RequestMeta; use crate::services::buffer::{EnvelopeBuffer, EnvelopeBufferError}; use crate::services::global_config; use crate::services::processor::{ - EncodeMetrics, EnvelopeProcessor, MetricData, ProcessEnvelope, ProcessingGroup, ProjectMetrics, + EncodeMetrics, EnvelopeProcessor, ProcessEnvelope, ProcessingGroup, ProjectMetrics, }; +use crate::services::projects::cache::{CheckedEnvelope, ProjectCacheHandle, ProjectChange}; use crate::Envelope; -use chrono::{DateTime, Utc}; use hashbrown::HashSet; use relay_base_schema::project::ProjectKey; use relay_config::Config; -use relay_metrics::Bucket; -use relay_quotas::RateLimits; -use relay_redis::RedisPool; use relay_statsd::metric; -use relay_system::{Addr, FromMessage, Interface, Sender, Service}; +use relay_system::{Addr, FromMessage, Interface, Service}; use tokio::sync::{mpsc, watch}; -use tokio::time::Instant; -use crate::services::metrics::{Aggregator, FlushBuckets}; +use crate::services::metrics::{Aggregator, FlushBuckets, MergeBuckets}; use crate::services::outcome::{DiscardReason, Outcome, TrackOutcome}; -use crate::services::projects::project::{Project, ProjectFetchState, ProjectSender, ProjectState}; -use crate::services::projects::source::ProjectSource; +use crate::services::projects::project::ProjectState; use crate::services::spooler::{ self, Buffer, BufferService, DequeueMany, Enqueue, QueueKey, RemoveMany, RestoreIndex, UnspooledEnvelope, BATCH_KEY_COUNT, }; use crate::services::test_store::TestStore; -use crate::services::upstream::UpstreamRelay; -use crate::statsd::{RelayCounters, RelayGauges, RelayHistograms, RelayTimers}; -use crate::utils::{GarbageDisposal, ManagedEnvelope, MemoryChecker, RetryBackoff, SleepHandle}; - -/// Requests a refresh of a project state from one of the available sources. -/// -/// The project state is resolved in the following precedence: -/// -/// 1. Local file system -/// 2. Redis cache (processing mode only) -/// 3. Upstream (managed and processing mode only) -/// -/// Requests to the upstream are performed via `UpstreamProjectSource`, which internally batches -/// individual requests. -#[derive(Clone, Debug)] -pub struct RequestUpdate { - /// The public key to fetch the project by. - pub project_key: ProjectKey, - /// If true, all caches should be skipped and a fresh state should be computed. - pub no_cache: bool, - /// Previously cached fetch state, if available. - /// - /// The upstream request will include the revision of the currently cached state, - /// if the upstream does not have a different revision, this cached - /// state is re-used and its expiry bumped. - pub cached_state: ProjectFetchState, -} - -/// Returns the project state. -/// -/// The project state is fetched if it is missing or outdated. If `no_cache` is specified, then the -/// state is always refreshed. -#[derive(Debug)] -pub struct GetProjectState { - project_key: ProjectKey, - no_cache: bool, -} - -impl GetProjectState { - /// Fetches the project state and uses the cached version if up-to-date. - pub fn new(project_key: ProjectKey) -> Self { - Self { - project_key, - no_cache: false, - } - } - - /// Fetches the project state and conditionally skips the cache. - pub fn no_cache(mut self, no_cache: bool) -> Self { - self.no_cache = no_cache; - self - } -} - -/// Returns the project state if it is already cached. -/// -/// This is used for cases when we only want to perform operations that do -/// not require waiting for network requests. -#[derive(Debug)] -pub struct GetCachedProjectState { - project_key: ProjectKey, -} - -impl GetCachedProjectState { - pub fn new(project_key: ProjectKey) -> Self { - Self { project_key } - } -} - -/// A checked envelope and associated rate limits. -/// -/// Items violating the rate limits have been removed from the envelope. If all items are removed -/// from the envelope, `None` is returned in place of the envelope. -#[derive(Debug)] -pub struct CheckedEnvelope { - pub envelope: Option, - pub rate_limits: RateLimits, -} - -/// Checks the envelope against project configuration and rate limits. -/// -/// When `fetched`, then the project state is ensured to be up to date. When `cached`, an outdated -/// project state may be used, or otherwise the envelope is passed through unaltered. -/// -/// To check the envelope, this runs: -/// - Validate origins and public keys -/// - Quotas with a limit of `0` -/// - Cached rate limits -#[derive(Debug)] -pub struct CheckEnvelope { - envelope: ManagedEnvelope, -} - -impl CheckEnvelope { - /// Uses a cached project state and checks the envelope. - pub fn new(envelope: ManagedEnvelope) -> Self { - Self { envelope } - } -} +use crate::statsd::{RelayCounters, RelayGauges, RelayTimers}; +use crate::utils::{ManagedEnvelope, MemoryChecker, RetryBackoff, SleepHandle}; /// Validates the envelope against project configuration and rate limits. /// -/// This ensures internally that the project state is up to date and then runs the same checks as -/// [`CheckEnvelope`]. Once the envelope has been validated, remaining items are forwarded to the -/// next stage: +/// This ensures internally that the project state is up to date . +/// Once the envelope has been validated, remaining items are forwarded to the next stage: /// /// - If the envelope needs dynamic sampling, and the project state is not cached or out of the /// date, the envelopes is spooled and we continue when the state is fetched. /// - Otherwise, the envelope is directly submitted to the [`EnvelopeProcessor`]. -/// -/// [`EnvelopeProcessor`]: crate::services::processor::EnvelopeProcessor #[derive(Debug)] pub struct ValidateEnvelope { envelope: ManagedEnvelope, @@ -154,66 +48,6 @@ impl ValidateEnvelope { } } -#[derive(Debug)] -pub struct UpdateRateLimits { - project_key: ProjectKey, - rate_limits: RateLimits, -} - -impl UpdateRateLimits { - pub fn new(project_key: ProjectKey, rate_limits: RateLimits) -> UpdateRateLimits { - Self { - project_key, - rate_limits, - } - } -} - -/// Source information where a metric bucket originates from. -#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] -pub enum BucketSource { - /// The metric bucket originated from an internal Relay use case. - /// - /// The metric bucket originates either from within the same Relay - /// or was accepted coming from another Relay which is registered as - /// an internal Relay via Relay's configuration. - Internal, - /// The bucket source originated from an untrusted source. - /// - /// Managed Relays sending extracted metrics are considered external, - /// it's a project use case but it comes from an untrusted source. - External, -} - -impl From<&RequestMeta> for BucketSource { - fn from(value: &RequestMeta) -> Self { - if value.is_from_internal_relay() { - Self::Internal - } else { - Self::External - } - } -} - -/// Starts the processing flow for received metrics. -/// -/// Enriches the raw data with projcet information and forwards -/// the metrics using [`ProcessProjectMetrics`](crate::services::processor::ProcessProjectMetrics). -#[derive(Debug)] -pub struct ProcessMetrics { - /// A list of metric items. - pub data: MetricData, - /// The target project. - pub project_key: ProjectKey, - /// Whether to keep or reset the metric metadata. - pub source: BucketSource, - /// The wall clock time at which the request was received. - pub received_at: DateTime, - /// The value of the Envelope's [`sent_at`](crate::envelope::Envelope::sent_at) - /// header for clock drift correction. - pub sent_at: Option>, -} - /// Updates the buffer index for [`ProjectKey`] with the [`QueueKey`] keys. /// /// This message is sent from the project buffer in case of the error while fetching the data from @@ -239,59 +73,24 @@ pub struct RefreshIndexCache(pub HashSet); #[derive(Debug)] pub struct DequeuedEnvelope(pub Box); -/// A request to update a project, typically sent by the envelope buffer. -/// -/// This message is similar to [`GetProjectState`], except it has no `no_cache` option -/// and it does not send a response, but sends a signal back to the buffer instead. -pub struct UpdateProject(pub ProjectKey); - -/// A cache for [`ProjectState`]s. -/// -/// The project maintains information about organizations, projects, and project keys along with -/// settings required to ingest traffic to Sentry. Internally, it tries to keep information -/// up-to-date and automatically retires unused old data. -/// -/// To retrieve information from the cache, use [`GetProjectState`] for guaranteed up-to-date -/// information, or [`GetCachedProjectState`] for immediately available but potentially older -/// information. +/// The legacy project cache. /// -/// There are also higher-level operations, such as [`CheckEnvelope`] and [`ValidateEnvelope`] that -/// inspect contents of envelopes for ingestion, as well as [`ProcessMetrics`] to aggregate metrics -/// associated with a project. -/// -/// See the enumerated variants for a full list of available messages for this service. +/// It manages spool v1 and some remaining messages which handle project state. #[derive(Debug)] pub enum ProjectCache { - RequestUpdate(RequestUpdate), - Get(GetProjectState, ProjectSender), - GetCached(GetCachedProjectState, Sender), - CheckEnvelope( - CheckEnvelope, - Sender>, - ), ValidateEnvelope(ValidateEnvelope), - UpdateRateLimits(UpdateRateLimits), - ProcessMetrics(ProcessMetrics), FlushBuckets(FlushBuckets), UpdateSpoolIndex(UpdateSpoolIndex), RefreshIndexCache(RefreshIndexCache), - UpdateProject(ProjectKey), } impl ProjectCache { pub fn variant(&self) -> &'static str { match self { - Self::RequestUpdate(_) => "RequestUpdate", - Self::Get(_, _) => "Get", - Self::GetCached(_, _) => "GetCached", - Self::CheckEnvelope(_, _) => "CheckEnvelope", Self::ValidateEnvelope(_) => "ValidateEnvelope", - Self::UpdateRateLimits(_) => "UpdateRateLimits", - Self::ProcessMetrics(_) => "ProcessMetrics", Self::FlushBuckets(_) => "FlushBuckets", Self::UpdateSpoolIndex(_) => "UpdateSpoolIndex", Self::RefreshIndexCache(_) => "RefreshIndexCache", - Self::UpdateProject(_) => "UpdateProject", } } } @@ -314,41 +113,6 @@ impl FromMessage for ProjectCache { } } -impl FromMessage for ProjectCache { - type Response = relay_system::NoResponse; - - fn from_message(message: RequestUpdate, _: ()) -> Self { - Self::RequestUpdate(message) - } -} - -impl FromMessage for ProjectCache { - type Response = relay_system::BroadcastResponse; - - fn from_message(message: GetProjectState, sender: ProjectSender) -> Self { - Self::Get(message, sender) - } -} - -impl FromMessage for ProjectCache { - type Response = relay_system::AsyncResponse; - - fn from_message(message: GetCachedProjectState, sender: Sender) -> Self { - Self::GetCached(message, sender) - } -} - -impl FromMessage for ProjectCache { - type Response = relay_system::AsyncResponse>; - - fn from_message( - message: CheckEnvelope, - sender: Sender>, - ) -> Self { - Self::CheckEnvelope(message, sender) - } -} - impl FromMessage for ProjectCache { type Response = relay_system::NoResponse; @@ -357,22 +121,6 @@ impl FromMessage for ProjectCache { } } -impl FromMessage for ProjectCache { - type Response = relay_system::NoResponse; - - fn from_message(message: UpdateRateLimits, _: ()) -> Self { - Self::UpdateRateLimits(message) - } -} - -impl FromMessage for ProjectCache { - type Response = relay_system::NoResponse; - - fn from_message(message: ProcessMetrics, _: ()) -> Self { - Self::ProcessMetrics(message) - } -} - impl FromMessage for ProjectCache { type Response = relay_system::NoResponse; @@ -381,27 +129,6 @@ impl FromMessage for ProjectCache { } } -impl FromMessage for ProjectCache { - type Response = relay_system::NoResponse; - - fn from_message(message: UpdateProject, _: ()) -> Self { - let UpdateProject(project_key) = message; - Self::UpdateProject(project_key) - } -} - -/// Updates the cache with new project state information. -struct UpdateProjectState { - /// The public key to fetch the project by. - project_key: ProjectKey, - - /// New project state information. - state: ProjectFetchState, - - /// If true, all caches should be skipped and a fresh state should be computed. - no_cache: bool, -} - /// Holds the addresses of all services required for [`ProjectCache`]. #[derive(Debug, Clone)] pub struct Services { @@ -411,7 +138,6 @@ pub struct Services { pub outcome_aggregator: Addr, pub project_cache: Addr, pub test_store: Addr, - pub upstream_relay: Addr, } /// Main broker of the [`ProjectCacheService`]. @@ -423,14 +149,7 @@ struct ProjectCacheBroker { config: Arc, memory_checker: MemoryChecker, services: Services, - // Need hashbrown because extract_if is not stable in std yet. - projects: hashbrown::HashMap, - /// Utility for disposing of expired project data in a background thread. - garbage_disposal: GarbageDisposal, - /// Source for fetching project states from the upstream or from disk. - source: ProjectSource, - /// Tx channel used to send the updated project state whenever requested. - state_tx: mpsc::UnboundedSender, + projects: ProjectCacheHandle, /// Handle to schedule periodic unspooling of buffered envelopes (spool V1). spool_v1_unspool_handle: SleepHandle, @@ -493,194 +212,35 @@ impl ProjectCacheBroker { .send(DequeueMany::new(keys, spool_v1.buffer_tx.clone())) } - /// Evict projects that are over its expiry date. - /// - /// Ideally, we would use `check_expiry` to determine expiry here. - /// However, for eviction, we want to add an additional delay, such that we do not delete - /// a project that has expired recently and for which a fetch is already underway in - /// [`super::source::upstream`]. - fn evict_stale_project_caches(&mut self) { - let eviction_start = Instant::now(); - let delta = 2 * self.config.project_cache_expiry() + self.config.project_grace_period(); - - let expired = self - .projects - .extract_if(|_, entry| entry.last_updated_at() + delta <= eviction_start); - - // Defer dropping the projects to a dedicated thread: - let mut count = 0; - for (project_key, project) in expired { - if let Some(spool_v1) = self.spool_v1.as_mut() { - let keys = spool_v1 - .index - .extract_if(|key| key.own_key == project_key || key.sampling_key == project_key) - .collect::>(); - - if !keys.is_empty() { - spool_v1.buffer.send(RemoveMany::new(project_key, keys)) - } - } - - self.garbage_disposal.dispose(project); - count += 1; - } - metric!(counter(RelayCounters::EvictingStaleProjectCaches) += count); - - // Log garbage queue size: - let queue_size = self.garbage_disposal.queue_size() as f64; - metric!(gauge(RelayGauges::ProjectCacheGarbageQueueSize) = queue_size); - - metric!(timer(RelayTimers::ProjectStateEvictionDuration) = eviction_start.elapsed()); - } - - fn get_or_create_project(&mut self, project_key: ProjectKey) -> &mut Project { - metric!(histogram(RelayHistograms::ProjectStateCacheSize) = self.projects.len() as u64); - - let config = self.config.clone(); - - self.projects - .entry(project_key) - .and_modify(|_| { - metric!(counter(RelayCounters::ProjectCacheHit) += 1); - }) - .or_insert_with(move || { - metric!(counter(RelayCounters::ProjectCacheMiss) += 1); - Project::new(project_key, config) - }) - } - - /// Updates the [`Project`] with received [`ProjectState`]. - /// - /// If the project state is valid we also send the message to the buffer service to dequeue the - /// envelopes for this project. - fn merge_state(&mut self, message: UpdateProjectState) { - let UpdateProjectState { - project_key, - state, - no_cache, - } = message; - - let project_cache = self.services.project_cache.clone(); - - let old_state = - self.get_or_create_project(project_key) - .update_state(&project_cache, state, no_cache); - - if let Some(old_state) = old_state { - self.garbage_disposal.dispose(old_state); - } - - // Try to schedule unspool if it's not scheduled yet. - self.schedule_unspool(); - - if let Some(envelope_buffer) = self.services.envelope_buffer.as_ref() { - envelope_buffer.send(EnvelopeBuffer::Ready(project_key)) + fn evict_project(&mut self, project_key: ProjectKey) { + let Some(ref mut spool_v1) = self.spool_v1 else { + return; }; - } - - fn handle_request_update(&mut self, message: RequestUpdate) { - let RequestUpdate { - project_key, - no_cache, - cached_state, - } = message; - // Bump the update time of the project in our hashmap to evade eviction. - let project = self.get_or_create_project(project_key); - project.refresh_updated_timestamp(); - let next_attempt = project.next_fetch_attempt(); + let keys = spool_v1 + .index + .extract_if(|key| key.own_key == project_key || key.sampling_key == project_key) + .collect::>(); - let source = self.source.clone(); - let sender = self.state_tx.clone(); - - tokio::spawn(async move { - // Wait on the new attempt time when set. - if let Some(next_attempt) = next_attempt { - tokio::time::sleep_until(next_attempt).await; - } - let state = source - .fetch(project_key, no_cache, cached_state) - .await - .unwrap_or_else(|e| { - relay_log::error!( - error = &e as &dyn Error, - tags.project_key = %project_key, - "Failed to fetch project from source" - ); - // TODO: change this to ProjectFetchState::pending() once we consider it safe to do so. - // see https://github.com/getsentry/relay/pull/4140. - ProjectFetchState::disabled() - }); - - let message = UpdateProjectState { - project_key, - no_cache, - state, - }; - - sender.send(message).ok(); - }); - } - - fn handle_get(&mut self, message: GetProjectState, sender: ProjectSender) { - let GetProjectState { - project_key, - no_cache, - } = message; - let project_cache = self.services.project_cache.clone(); - let project = self.get_or_create_project(project_key); - - project.get_state(project_cache, sender, no_cache); - } - - fn handle_get_cached(&mut self, message: GetCachedProjectState) -> ProjectState { - let project_cache = self.services.project_cache.clone(); - self.get_or_create_project(message.project_key) - .get_cached_state(project_cache, false) + if !keys.is_empty() { + spool_v1.buffer.send(RemoveMany::new(project_key, keys)) + } } - fn handle_check_envelope( - &mut self, - message: CheckEnvelope, - ) -> Result { - let CheckEnvelope { envelope: context } = message; - let project_cache = self.services.project_cache.clone(); - let project_key = context.envelope().meta().public_key(); - if let Some(sampling_key) = context.envelope().sampling_key() { - if sampling_key != project_key { - let sampling_project = self.get_or_create_project(sampling_key); - sampling_project.prefetch(project_cache.clone(), false); - } + fn handle_project_event(&mut self, event: ProjectChange) { + match event { + ProjectChange::Ready(_) => self.schedule_unspool(), + ProjectChange::Evicted(project_key) => self.evict_project(project_key), } - let project = self.get_or_create_project(project_key); - - // Preload the project cache so that it arrives a little earlier in processing. However, - // do not pass `no_cache`. In case the project is rate limited, we do not want to force - // a full reload. Fetching must not block the store request. - project.prefetch(project_cache, false); - - project.check_envelope(context) } /// Handles the processing of the provided envelope. - fn handle_processing(&mut self, key: QueueKey, mut managed_envelope: ManagedEnvelope) { + fn handle_processing(&mut self, mut managed_envelope: ManagedEnvelope) { let project_key = managed_envelope.envelope().meta().public_key(); - let Some(project) = self.projects.get_mut(&project_key) else { - relay_log::error!( - tags.project_key = %project_key, - "project could not be found in the cache", - ); - - let mut project = Project::new(project_key, self.config.clone()); - project.prefetch(self.services.project_cache.clone(), false); - self.projects.insert(project_key, project); - self.enqueue(key, managed_envelope); - return; - }; + let project = self.projects.get(project_key); - let project_cache = self.services.project_cache.clone(); - let project_info = match project.get_cached_state(project_cache.clone(), false) { + let project_info = match project.state() { ProjectState::Enabled(info) => info, ProjectState::Disabled => { managed_envelope.reject(Outcome::Invalid(DiscardReason::ProjectId)); @@ -707,20 +267,19 @@ impl ProjectCacheBroker { .. }) = project.check_envelope(managed_envelope) { - let rate_limits = project.current_rate_limits().clone(); - - let reservoir_counters = project.reservoir_counters(); + let rate_limits = project.rate_limits().current_limits(); + let reservoir_counters = project.reservoir_counters().clone(); let sampling_project_info = managed_envelope .envelope() .sampling_key() - .and_then(|key| self.projects.get_mut(&key)) - .and_then(|p| p.get_cached_state(project_cache, false).enabled()) - .filter(|state| state.organization_id == project_info.organization_id); + .map(|key| self.projects.get(key)) + .and_then(|p| p.state().clone().enabled()) + .filter(|info| info.organization_id == project_info.organization_id); let process = ProcessEnvelope { envelope: managed_envelope, - project_info, + project_info: Arc::clone(project_info), rate_limits, sampling_project_info, reservoir_counters, @@ -748,17 +307,13 @@ impl ProjectCacheBroker { envelope: mut managed_envelope, } = message; - let project_cache = self.services.project_cache.clone(); let envelope = managed_envelope.envelope(); // Fetch the project state for our key and make sure it's not invalid. let own_key = envelope.meta().public_key(); - let project = self.get_or_create_project(own_key); + let project = self.projects.get(own_key); - let project_state = - project.get_cached_state(project_cache.clone(), envelope.meta().no_cache()); - - let project_state = match project_state { + let project_state = match project.state() { ProjectState::Enabled(state) => Some(state), ProjectState::Disabled => { managed_envelope.reject(Outcome::Invalid(DiscardReason::ProjectId)); @@ -770,12 +325,10 @@ impl ProjectCacheBroker { // Also, fetch the project state for sampling key and make sure it's not invalid. let sampling_key = envelope.sampling_key(); let mut requires_sampling_state = sampling_key.is_some(); - let sampling_state = if let Some(sampling_key) = sampling_key { - let state = self - .get_or_create_project(sampling_key) - .get_cached_state(project_cache, envelope.meta().no_cache()); - match state { - ProjectState::Enabled(state) => Some(state), + let sampling_info = if let Some(sampling_key) = sampling_key { + let sampling_project = self.projects.get(sampling_key); + match sampling_project.state() { + ProjectState::Enabled(info) => Some(Arc::clone(info)), ProjectState::Disabled => { relay_log::trace!("Sampling state is disabled ({sampling_key})"); // We accept events even if its root project has been disabled. @@ -796,50 +349,33 @@ impl ProjectCacheBroker { // Trigger processing once we have a project state and we either have a sampling project // state or we do not need one. if project_state.is_some() - && (sampling_state.is_some() || !requires_sampling_state) + && (sampling_info.is_some() || !requires_sampling_state) && self.memory_checker.check_memory().has_capacity() && self.global_config.is_ready() { // TODO: Add ready project infos to the processing message. relay_log::trace!("Sending envelope to processor"); - return self.handle_processing(key, managed_envelope); + return self.handle_processing(managed_envelope); } relay_log::trace!("Enqueueing envelope"); self.enqueue(key, managed_envelope); } - fn handle_rate_limits(&mut self, message: UpdateRateLimits) { - self.get_or_create_project(message.project_key) - .merge_rate_limits(message.rate_limits); - } - - fn handle_process_metrics(&mut self, message: ProcessMetrics) { - let project_cache = self.services.project_cache.clone(); - - let message = self - .get_or_create_project(message.project_key) - .prefetch(project_cache, false) - .process_metrics(message); - - self.services.envelope_processor.send(message); - } - fn handle_flush_buckets(&mut self, message: FlushBuckets) { let aggregator = self.services.aggregator.clone(); - let project_cache = self.services.project_cache.clone(); let mut no_project = 0; let mut scoped_buckets = BTreeMap::new(); for (project_key, buckets) in message.buckets { - let project = self.get_or_create_project(project_key); + let project = self.projects.get(project_key); - let project_info = match project.current_state() { + let project_info = match project.state() { ProjectState::Pending => { no_project += 1; - // Schedule an update for the project just in case. - project.prefetch(project_cache.clone(), false); - project.return_buckets(&aggregator, buckets); + + // Return the buckets to the aggregator. + aggregator.send(MergeBuckets::new(project_key, buckets)); continue; } ProjectState::Disabled => { @@ -848,13 +384,12 @@ impl ProjectCacheBroker { // Ideally we log outcomes for the metrics here, but currently for metric // outcomes we need a valid scoping, which we cannot construct for disabled // projects. - self.garbage_disposal.dispose(buckets); continue; } ProjectState::Enabled(project_info) => project_info, }; - let Some(scoping) = project.scoping() else { + let Some(scoping) = project_info.scoping(project_key) else { relay_log::error!( tags.project_key = project_key.as_str(), "there is no scoping: dropping {} buckets", @@ -867,8 +402,8 @@ impl ProjectCacheBroker { match scoped_buckets.entry(scoping) { Vacant(entry) => { entry.insert(ProjectMetrics { - project_info, - rate_limits: project.current_rate_limits().clone(), + project_info: Arc::clone(project_info), + rate_limits: project.rate_limits().current_limits(), buckets, }); } @@ -895,16 +430,14 @@ impl ProjectCacheBroker { fn handle_refresh_index_cache(&mut self, message: RefreshIndexCache) { let RefreshIndexCache(index) = message; - let project_cache = self.services.project_cache.clone(); for key in index { let spool_v1 = self.spool_v1.as_mut().expect("no V1 spool configured"); spool_v1.index.insert(key); - self.get_or_create_project(key.own_key) - .prefetch(project_cache.clone(), false); + + self.projects.fetch(key.own_key); if key.own_key != key.sampling_key { - self.get_or_create_project(key.sampling_key) - .prefetch(project_cache.clone(), false); + self.projects.fetch(key.sampling_key); } } } @@ -918,11 +451,10 @@ impl ProjectCacheBroker { let services = self.services.clone(); let own_key = envelope.meta().public_key(); - let project = self.get_or_create_project(own_key); - let project_state = project.get_cached_state(services.project_cache.clone(), false); + let project = self.projects.get(own_key); // Check if project config is enabled. - let project_info = match project_state { + let project_info = match project.state() { ProjectState::Enabled(info) => info, ProjectState::Disabled => { let mut managed_envelope = ManagedEnvelope::new( @@ -944,8 +476,7 @@ impl ProjectCacheBroker { let sampling_project_info = match sampling_key.map(|sampling_key| { ( sampling_key, - self.get_or_create_project(sampling_key) - .get_cached_state(services.project_cache, false), + self.projects.get(sampling_key).state().clone(), ) }) { Some((_, ProjectState::Enabled(info))) => { @@ -964,8 +495,6 @@ impl ProjectCacheBroker { None => None, }; - let project = self.get_or_create_project(own_key); - // Reassign processing groups and proceed to processing. for (group, envelope) in ProcessingGroup::split_envelope(*envelope) { let managed_envelope = ManagedEnvelope::new( @@ -983,11 +512,11 @@ impl ProjectCacheBroker { continue; // Outcomes are emitted by check_envelope }; - let reservoir_counters = project.reservoir_counters(); + let reservoir_counters = project.reservoir_counters().clone(); services.envelope_processor.send(ProcessEnvelope { envelope: managed_envelope, project_info: project_info.clone(), - rate_limits: project.current_rate_limits().clone(), + rate_limits: project.rate_limits().current_limits(), sampling_project_info: sampling_project_info.clone(), reservoir_counters, }); @@ -996,22 +525,6 @@ impl ProjectCacheBroker { Ok(()) } - fn handle_update_project(&mut self, project_key: ProjectKey) { - let project_cache = self.services.project_cache.clone(); - let envelope_buffer = self.services.envelope_buffer.clone(); - let project = self.get_or_create_project(project_key); - - // If the project is already loaded, inform the envelope buffer. - if !project.current_state().is_pending() { - if let Some(envelope_buffer) = envelope_buffer { - envelope_buffer.send(EnvelopeBuffer::Ready(project_key)); - } - } - - let no_cache = false; - project.prefetch(project_cache, no_cache); - } - /// Returns backoff timeout for an unspool attempt. fn next_unspool_attempt(&mut self) -> Duration { let spool_v1 = self.spool_v1.as_mut().expect("no V1 spool configured"); @@ -1036,15 +549,9 @@ impl ProjectCacheBroker { /// Which includes the own key and the sampling key for the project. /// Note: this function will trigger [`ProjectState`] refresh if it's already expired. fn is_state_cached(&mut self, key: &QueueKey) -> bool { - key.unique_keys().iter().all(|key| { - self.projects.get_mut(key).is_some_and(|project| { - // Returns `Some` if the project is cached otherwise None and also triggers refresh - // in background. - !project - .get_cached_state(self.services.project_cache.clone(), false) - .is_pending() - }) - }) + key.unique_keys() + .iter() + .all(|key| !self.projects.get(*key).state().is_pending()) } /// Iterates the buffer index and tries to unspool the envelopes for projects with a valid @@ -1104,29 +611,18 @@ impl ProjectCacheBroker { fn handle_message(&mut self, message: ProjectCache) { let ty = message.variant(); metric!( - timer(RelayTimers::ProjectCacheMessageDuration), + timer(RelayTimers::LegacyProjectCacheMessageDuration), message = ty, { match message { - ProjectCache::RequestUpdate(message) => self.handle_request_update(message), - ProjectCache::Get(message, sender) => self.handle_get(message, sender), - ProjectCache::GetCached(message, sender) => { - sender.send(self.handle_get_cached(message)) - } - ProjectCache::CheckEnvelope(message, sender) => { - sender.send(self.handle_check_envelope(message)) - } ProjectCache::ValidateEnvelope(message) => { self.handle_validate_envelope(message) } - ProjectCache::UpdateRateLimits(message) => self.handle_rate_limits(message), - ProjectCache::ProcessMetrics(message) => self.handle_process_metrics(message), ProjectCache::FlushBuckets(message) => self.handle_flush_buckets(message), ProjectCache::UpdateSpoolIndex(message) => self.handle_buffer_index(message), ProjectCache::RefreshIndexCache(message) => { self.handle_refresh_index_cache(message) } - ProjectCache::UpdateProject(project) => self.handle_update_project(project), } } ) @@ -1153,11 +649,11 @@ impl ProjectCacheBroker { pub struct ProjectCacheService { config: Arc, memory_checker: MemoryChecker, + project_cache_handle: ProjectCacheHandle, services: Services, global_config_rx: watch::Receiver, /// Bounded channel used exclusively to receive envelopes from the envelope buffer. envelopes_rx: mpsc::Receiver, - redis: Option, } impl ProjectCacheService { @@ -1165,18 +661,18 @@ impl ProjectCacheService { pub fn new( config: Arc, memory_checker: MemoryChecker, + project_cache_handle: ProjectCacheHandle, services: Services, global_config_rx: watch::Receiver, envelopes_rx: mpsc::Receiver, - redis: Option, ) -> Self { Self { config, memory_checker, + project_cache_handle, services, global_config_rx, envelopes_rx, - redis, } } } @@ -1188,22 +684,19 @@ impl Service for ProjectCacheService { let Self { config, memory_checker, + project_cache_handle, services, mut global_config_rx, mut envelopes_rx, - redis, } = self; + let mut project_events = project_cache_handle.changes(); let project_cache = services.project_cache.clone(); let outcome_aggregator = services.outcome_aggregator.clone(); let test_store = services.test_store.clone(); tokio::spawn(async move { - let mut ticker = tokio::time::interval(config.cache_eviction_interval()); relay_log::info!("project cache started"); - // Channel for async project state responses back into the project cache. - let (state_tx, mut state_rx) = mpsc::unbounded_channel(); - let global_config = match global_config_rx.borrow().clone() { global_config::Status::Ready(_) => { relay_log::info!("global config received"); @@ -1256,20 +749,11 @@ impl Service for ProjectCacheService { }), }; - // Main broker that serializes public and internal messages, and triggers project state - // fetches via the project source. let mut broker = ProjectCacheBroker { config: config.clone(), memory_checker, - projects: hashbrown::HashMap::new(), - garbage_disposal: GarbageDisposal::new(), - source: ProjectSource::start( - config.clone(), - services.upstream_relay.clone(), - redis, - ), + projects: project_cache_handle, services, - state_tx, spool_v1_unspool_handle: SleepHandle::idle(), spool_v1, global_config, @@ -1280,7 +764,7 @@ impl Service for ProjectCacheService { biased; Ok(()) = global_config_rx.changed() => { - metric!(timer(RelayTimers::ProjectCacheTaskDuration), task = "update_global_config", { + metric!(timer(RelayTimers::LegacyProjectCacheTaskDuration), task = "update_global_config", { match global_config_rx.borrow().clone() { global_config::Status::Ready(_) => broker.set_global_config_ready(), // The watch should only be updated if it gets a new value. @@ -1289,35 +773,30 @@ impl Service for ProjectCacheService { } }) }, - Some(message) = state_rx.recv() => { - metric!(timer(RelayTimers::ProjectCacheTaskDuration), task = "merge_state", { - broker.merge_state(message) + Ok(project_event) = project_events.recv() => { + metric!(timer(RelayTimers::LegacyProjectCacheTaskDuration), task = "handle_project_event", { + broker.handle_project_event(project_event); }) } // Buffer will not dequeue the envelopes from the spool if there is not enough // permits in `BufferGuard` available. Currently this is 50%. - Some(UnspooledEnvelope { managed_envelope, key }) = buffer_rx.recv() => { - metric!(timer(RelayTimers::ProjectCacheTaskDuration), task = "handle_processing", { - broker.handle_processing(key, managed_envelope) + Some(UnspooledEnvelope { managed_envelope, .. }) = buffer_rx.recv() => { + metric!(timer(RelayTimers::LegacyProjectCacheTaskDuration), task = "handle_processing", { + broker.handle_processing(managed_envelope) }) }, - _ = ticker.tick() => { - metric!(timer(RelayTimers::ProjectCacheTaskDuration), task = "evict_project_caches", { - broker.evict_stale_project_caches() - }) - } () = &mut broker.spool_v1_unspool_handle => { - metric!(timer(RelayTimers::ProjectCacheTaskDuration), task = "periodic_unspool", { + metric!(timer(RelayTimers::LegacyProjectCacheTaskDuration), task = "periodic_unspool", { broker.handle_periodic_unspool() }) } Some(message) = rx.recv() => { - metric!(timer(RelayTimers::ProjectCacheTaskDuration), task = "handle_message", { + metric!(timer(RelayTimers::LegacyProjectCacheTaskDuration), task = "handle_message", { broker.handle_message(message) }) } Some(message) = envelopes_rx.recv() => { - metric!(timer(RelayTimers::ProjectCacheTaskDuration), task = "handle_envelope", { + metric!(timer(RelayTimers::LegacyProjectCacheTaskDuration), task = "handle_envelope", { broker.handle_envelope(message) }) } @@ -1330,33 +809,6 @@ impl Service for ProjectCacheService { } } -/// Sum type for all objects which need to be discareded through the [`GarbageDisposal`]. -#[derive(Debug)] -#[allow(dead_code)] // Fields are never read, only used for discarding/dropping data. -enum ProjectGarbage { - Project(Project), - ProjectFetchState(ProjectFetchState), - Metrics(Vec), -} - -impl From for ProjectGarbage { - fn from(value: Project) -> Self { - Self::Project(value) - } -} - -impl From for ProjectGarbage { - fn from(value: ProjectFetchState) -> Self { - Self::ProjectFetchState(value) - } -} - -impl From> for ProjectGarbage { - fn from(value: Vec) -> Self { - Self::Metrics(value) - } -} - #[cfg(test)] mod tests { use relay_test::mock_service; @@ -1375,7 +827,6 @@ mod tests { let (outcome_aggregator, _) = mock_service("outcome_aggregator", (), |&mut (), _| {}); let (project_cache, _) = mock_service("project_cache", (), |&mut (), _| {}); let (test_store, _) = mock_service("test_store", (), |&mut (), _| {}); - let (upstream_relay, _) = mock_service("upstream_relay", (), |&mut (), _| {}); Services { envelope_buffer: None, @@ -1384,13 +835,11 @@ mod tests { project_cache, outcome_aggregator, test_store, - upstream_relay, } } async fn project_cache_broker_setup( services: Services, - state_tx: mpsc::UnboundedSender, buffer_tx: mpsc::UnboundedSender, ) -> (ProjectCacheBroker, Addr) { let config: Arc<_> = Config::from_json_value(serde_json::json!({ @@ -1429,11 +878,8 @@ mod tests { ProjectCacheBroker { config: config.clone(), memory_checker, - projects: hashbrown::HashMap::new(), - garbage_disposal: GarbageDisposal::new(), - source: ProjectSource::start(config, services.upstream_relay.clone(), None), + projects: ProjectCacheHandle::for_test(), services, - state_tx, spool_v1_unspool_handle: SleepHandle::idle(), spool_v1: Some(SpoolV1 { buffer_tx, @@ -1452,13 +898,13 @@ mod tests { relay_log::init_test!(); let services = mocked_services(); - let (state_tx, _) = mpsc::unbounded_channel(); let (buffer_tx, mut buffer_rx) = mpsc::unbounded_channel(); let (mut broker, _buffer_svc) = - project_cache_broker_setup(services.clone(), state_tx, buffer_tx).await; + project_cache_broker_setup(services.clone(), buffer_tx).await; + let projects = broker.projects.clone(); + let mut project_events = projects.changes(); broker.global_config = GlobalConfigStatus::Ready; - let (tx_update, mut rx_update) = mpsc::unbounded_channel(); let (tx_assert, mut rx_assert) = mpsc::unbounded_channel(); let dsn1 = "111d836b15bb49d7bbf99e64295d995b"; @@ -1485,11 +931,12 @@ mod tests { tokio::task::spawn(async move { loop { select! { - + Ok(project_event) = project_events.recv() => { + broker.handle_project_event(project_event); + } Some(assert) = rx_assert.recv() => { assert_eq!(broker.spool_v1.as_ref().unwrap().index.len(), assert); }, - Some(update) = rx_update.recv() => broker.merge_state(update), () = &mut broker.spool_v1_unspool_handle => broker.handle_periodic_unspool(), } } @@ -1498,13 +945,11 @@ mod tests { // Before updating any project states. tx_assert.send(2).unwrap(); - let update_dsn1_project_state = UpdateProjectState { - project_key: ProjectKey::parse(dsn1).unwrap(), - state: ProjectFetchState::allowed(), - no_cache: false, - }; + projects.test_set_project_state( + ProjectKey::parse(dsn1).unwrap(), + ProjectState::new_allowed(), + ); - tx_update.send(update_dsn1_project_state).unwrap(); assert!(buffer_rx.recv().await.is_some()); // One of the project should be unspooled. tx_assert.send(1).unwrap(); @@ -1512,59 +957,15 @@ mod tests { // Schedule some work... tokio::time::sleep(Duration::from_secs(2)).await; - let update_dsn2_project_state = UpdateProjectState { - project_key: ProjectKey::parse(dsn2).unwrap(), - state: ProjectFetchState::allowed(), - no_cache: false, - }; + projects.test_set_project_state( + ProjectKey::parse(dsn2).unwrap(), + ProjectState::new_allowed(), + ); - tx_update.send(update_dsn2_project_state).unwrap(); assert!(buffer_rx.recv().await.is_some()); // The last project should be unspooled. tx_assert.send(0).unwrap(); // Make sure the last assert is tested. tokio::time::sleep(Duration::from_millis(100)).await; } - - #[tokio::test] - async fn handle_processing_without_project() { - let services = mocked_services(); - let (state_tx, _) = mpsc::unbounded_channel(); - let (buffer_tx, mut buffer_rx) = mpsc::unbounded_channel(); - let (mut broker, buffer_svc) = - project_cache_broker_setup(services.clone(), state_tx, buffer_tx.clone()).await; - - let dsn = "111d836b15bb49d7bbf99e64295d995b"; - let project_key = ProjectKey::parse(dsn).unwrap(); - let key = QueueKey { - own_key: project_key, - sampling_key: project_key, - }; - let envelope = ManagedEnvelope::new( - empty_envelope_with_dsn(dsn), - services.outcome_aggregator.clone(), - services.test_store.clone(), - ProcessingGroup::Ungrouped, - ); - - // Index and projects are empty. - assert!(broker.projects.is_empty()); - assert!(broker.spool_v1.as_mut().unwrap().index.is_empty()); - - // Since there is no project we should not process anything but create a project and spool - // the envelope. - broker.handle_processing(key, envelope); - - // Assert that we have a new project and also added an index. - assert!(broker.projects.get(&project_key).is_some()); - assert!(broker.spool_v1.as_mut().unwrap().index.contains(&key)); - - // Check is we actually spooled anything. - buffer_svc.send(DequeueMany::new([key].into(), buffer_tx.clone())); - let UnspooledEnvelope { - managed_envelope, .. - } = buffer_rx.recv().await.unwrap(); - - assert_eq!(key, QueueKey::from_envelope(managed_envelope.envelope())); - } } diff --git a/relay-server/src/services/projects/cache/mod.rs b/relay-server/src/services/projects/cache/mod.rs new file mode 100644 index 0000000000..35ee749ecc --- /dev/null +++ b/relay-server/src/services/projects/cache/mod.rs @@ -0,0 +1,10 @@ +mod handle; +mod project; +mod service; +mod state; + +pub mod legacy; + +pub use self::handle::ProjectCacheHandle; +pub use self::project::{CheckedEnvelope, Project}; +pub use self::service::{ProjectCache, ProjectCacheService, ProjectChange}; diff --git a/relay-server/src/services/projects/cache/project.rs b/relay-server/src/services/projects/cache/project.rs new file mode 100644 index 0000000000..4b9e95a2be --- /dev/null +++ b/relay-server/src/services/projects/cache/project.rs @@ -0,0 +1,299 @@ +use std::sync::Arc; + +use relay_config::Config; +use relay_dynamic_config::Feature; +use relay_quotas::{CachedRateLimits, DataCategory, MetricNamespaceScoping, RateLimits}; +use relay_sampling::evaluation::ReservoirCounters; + +use crate::envelope::ItemType; +use crate::services::outcome::{DiscardReason, Outcome}; +use crate::services::projects::cache::state::SharedProject; +use crate::services::projects::project::ProjectState; +use crate::utils::{CheckLimits, Enforcement, EnvelopeLimiter, ManagedEnvelope}; + +/// A loaded project. +pub struct Project<'a> { + shared: SharedProject, + config: &'a Config, +} + +impl<'a> Project<'a> { + pub(crate) fn new(shared: SharedProject, config: &'a Config) -> Self { + Self { shared, config } + } + + /// Returns a reference to the currently cached project state. + pub fn state(&self) -> &ProjectState { + self.shared.project_state() + } + + /// Returns a reference to the currently cached rate limits. + pub fn rate_limits(&self) -> &CachedRateLimits { + self.shared.cached_rate_limits() + } + + /// Returns a reference to the currently reservoir counters. + pub fn reservoir_counters(&self) -> &ReservoirCounters { + self.shared.reservoir_counters() + } + + /// Checks the envelope against project configuration and rate limits. + /// + /// When `fetched`, then the project state is ensured to be up to date. When `cached`, an outdated + /// project state may be used, or otherwise the envelope is passed through unaltered. + /// + /// To check the envelope, this runs: + /// - Validate origins and public keys + /// - Quotas with a limit of `0` + /// - Cached rate limits + pub fn check_envelope( + &self, + mut envelope: ManagedEnvelope, + ) -> Result { + let state = match self.state() { + ProjectState::Enabled(state) => Some(Arc::clone(state)), + ProjectState::Disabled => { + // TODO(jjbayer): We should refactor this function to either return a Result or + // handle envelope rejections internally, but not both. + envelope.reject(Outcome::Invalid(DiscardReason::ProjectId)); + return Err(DiscardReason::ProjectId); + } + ProjectState::Pending => None, + }; + + let mut scoping = envelope.scoping(); + + if let Some(ref state) = state { + scoping = state.scope_request(envelope.envelope().meta()); + envelope.scope(scoping); + + if let Err(reason) = state.check_envelope(envelope.envelope(), self.config) { + envelope.reject(Outcome::Invalid(reason)); + return Err(reason); + } + } + + let current_limits = self.rate_limits().current_limits(); + + let quotas = state.as_deref().map(|s| s.get_quotas()).unwrap_or(&[]); + let envelope_limiter = EnvelopeLimiter::new(CheckLimits::NonIndexed, |item_scoping, _| { + Ok(current_limits.check_with_quotas(quotas, item_scoping)) + }); + + let (mut enforcement, mut rate_limits) = + envelope_limiter.compute(envelope.envelope_mut(), &scoping)?; + + let check_nested_spans = state + .as_ref() + .is_some_and(|s| s.has_feature(Feature::ExtractSpansFromEvent)); + + // If we can extract spans from the event, we want to try and count the number of nested + // spans to correctly emit negative outcomes in case the transaction itself is dropped. + if check_nested_spans { + sync_spans_to_enforcement(&envelope, &mut enforcement); + } + + enforcement.apply_with_outcomes(&mut envelope); + + envelope.update(); + + // Special case: Expose active rate limits for all metric namespaces if there is at least + // one metrics item in the Envelope to communicate backoff to SDKs. This is necessary + // because `EnvelopeLimiter` cannot not check metrics without parsing item contents. + if envelope.envelope().items().any(|i| i.ty().is_metrics()) { + let mut metrics_scoping = scoping.item(DataCategory::MetricBucket); + metrics_scoping.namespace = MetricNamespaceScoping::Any; + rate_limits.merge(current_limits.check_with_quotas(quotas, metrics_scoping)); + } + + let envelope = if envelope.envelope().is_empty() { + // Individual rate limits have already been issued above + envelope.reject(Outcome::RateLimited(None)); + None + } else { + Some(envelope) + }; + + Ok(CheckedEnvelope { + envelope, + rate_limits, + }) + } +} + +/// A checked envelope and associated rate limits. +/// +/// Items violating the rate limits have been removed from the envelope. If all items are removed +/// from the envelope, `None` is returned in place of the envelope. +#[derive(Debug)] +pub struct CheckedEnvelope { + pub envelope: Option, + pub rate_limits: RateLimits, +} + +/// Adds category limits for the nested spans inside a transaction. +/// +/// On the fast path of rate limiting, we do not have nested spans of a transaction extracted +/// as top-level spans, thus if we limited a transaction, we want to count and emit negative +/// outcomes for each of the spans nested inside that transaction. +fn sync_spans_to_enforcement(envelope: &ManagedEnvelope, enforcement: &mut Enforcement) { + if !enforcement.is_event_active() { + return; + } + + let spans_count = count_nested_spans(envelope); + if spans_count == 0 { + return; + } + + if enforcement.event.is_active() { + enforcement.spans = enforcement.event.clone_for(DataCategory::Span, spans_count); + } + + if enforcement.event_indexed.is_active() { + enforcement.spans_indexed = enforcement + .event_indexed + .clone_for(DataCategory::SpanIndexed, spans_count); + } +} + +/// Counts the nested spans inside the first transaction envelope item inside the [`Envelope`](crate::envelope::Envelope). +fn count_nested_spans(envelope: &ManagedEnvelope) -> usize { + #[derive(Debug, serde::Deserialize)] + struct PartialEvent { + spans: crate::utils::SeqCount, + } + + envelope + .envelope() + .items() + .find(|item| *item.ty() == ItemType::Transaction && !item.spans_extracted()) + .and_then(|item| serde_json::from_slice::(&item.payload()).ok()) + // We do + 1, since we count the transaction itself because it will be extracted + // as a span and counted during the slow path of rate limiting. + .map_or(0, |event| event.spans.0 + 1) +} + +#[cfg(test)] +mod tests { + use crate::envelope::{ContentType, Envelope, Item}; + use crate::extractors::RequestMeta; + use crate::services::processor::ProcessingGroup; + use crate::services::projects::project::{ProjectInfo, PublicKeyConfig}; + use relay_base_schema::project::{ProjectId, ProjectKey}; + use relay_event_schema::protocol::EventId; + use serde_json::json; + use smallvec::smallvec; + + use super::*; + + fn create_project(config: &Config, data: Option) -> Project<'_> { + let mut project_info = ProjectInfo { + project_id: Some(ProjectId::new(42)), + ..Default::default() + }; + project_info.public_keys = smallvec![PublicKeyConfig { + public_key: ProjectKey::parse("e12d836b15bb49d7bbf99e64295d995b").unwrap(), + numeric_id: None, + }]; + + if let Some(data) = data { + project_info.config = serde_json::from_value(data).unwrap(); + } + + Project::new( + SharedProject::for_test(ProjectState::Enabled(project_info.into())), + config, + ) + } + + fn request_meta() -> RequestMeta { + let dsn = "https://e12d836b15bb49d7bbf99e64295d995b:@sentry.io/42" + .parse() + .unwrap(); + + RequestMeta::new(dsn) + } + + #[test] + fn test_track_nested_spans_outcomes() { + let config = Default::default(); + let project = create_project( + &config, + Some(json!({ + "features": [ + "organizations:indexed-spans-extraction" + ], + "quotas": [{ + "id": "foo", + "categories": ["transaction"], + "window": 3600, + "limit": 0, + "reasonCode": "foo", + }] + })), + ); + + let mut envelope = Envelope::from_request(Some(EventId::new()), request_meta()); + + let mut transaction = Item::new(ItemType::Transaction); + transaction.set_payload( + ContentType::Json, + r#"{ + "event_id": "52df9022835246eeb317dbd739ccd059", + "type": "transaction", + "transaction": "I have a stale timestamp, but I'm recent!", + "start_timestamp": 1, + "timestamp": 2, + "contexts": { + "trace": { + "trace_id": "ff62a8b040f340bda5d830223def1d81", + "span_id": "bd429c44b67a3eb4" + } + }, + "spans": [ + { + "span_id": "bd429c44b67a3eb4", + "start_timestamp": 1, + "timestamp": null, + "trace_id": "ff62a8b040f340bda5d830223def1d81" + }, + { + "span_id": "bd429c44b67a3eb5", + "start_timestamp": 1, + "timestamp": null, + "trace_id": "ff62a8b040f340bda5d830223def1d81" + } + ] +}"#, + ); + + envelope.add_item(transaction); + + let (outcome_aggregator, mut outcome_aggregator_rx) = relay_system::Addr::custom(); + let (test_store, _) = relay_system::Addr::custom(); + + let managed_envelope = ManagedEnvelope::new( + envelope, + outcome_aggregator.clone(), + test_store, + ProcessingGroup::Transaction, + ); + + project.check_envelope(managed_envelope).unwrap(); + drop(outcome_aggregator); + + let expected = [ + (DataCategory::Transaction, 1), + (DataCategory::TransactionIndexed, 1), + (DataCategory::Span, 3), + (DataCategory::SpanIndexed, 3), + ]; + + for (expected_category, expected_quantity) in expected { + let outcome = outcome_aggregator_rx.blocking_recv().unwrap(); + assert_eq!(outcome.category, expected_category); + assert_eq!(outcome.quantity, expected_quantity); + } + } +} diff --git a/relay-server/src/services/projects/cache/service.rs b/relay-server/src/services/projects/cache/service.rs new file mode 100644 index 0000000000..23a21e553c --- /dev/null +++ b/relay-server/src/services/projects/cache/service.rs @@ -0,0 +1,225 @@ +use std::sync::Arc; + +use relay_base_schema::project::ProjectKey; +use relay_config::Config; +use relay_statsd::metric; +use relay_system::Service; +use tokio::sync::{broadcast, mpsc}; + +use crate::services::projects::cache::handle::ProjectCacheHandle; +use crate::services::projects::cache::state::{CompletedFetch, Fetch, ProjectStore}; +use crate::services::projects::project::ProjectState; +use crate::services::projects::source::ProjectSource; +use crate::statsd::{RelayGauges, RelayTimers}; + +/// Size of the broadcast channel for project events. +/// +/// This is set to a value which theoretically should never be reachable, +/// the number of events is approximately bounded by the amount of projects +/// receiving events. +/// +/// It is set to such a large amount because receivers of events currently +/// do not deal with lags in the channel gracefully. +const PROJECT_EVENTS_CHANNEL_SIZE: usize = 512_000; + +/// A cache for projects, which allows concurrent access to the cached projects. +#[derive(Debug)] +pub enum ProjectCache { + /// Schedules another fetch or update for the specified project. + /// + /// A project which is not fetched will eventually expire and be evicted + /// from the cache. Fetches for an already cached project ensure the project + /// is always up to date and not evicted. + Fetch(ProjectKey), +} + +impl ProjectCache { + fn variant(&self) -> &'static str { + match self { + Self::Fetch(_) => "fetch", + } + } +} + +impl relay_system::Interface for ProjectCache {} + +impl relay_system::FromMessage for ProjectCache { + type Response = relay_system::NoResponse; + + fn from_message(message: Self, _: ()) -> Self { + message + } +} + +/// Project life-cycle changes produced by the project cache. +#[derive(Debug, Copy, Clone)] +pub enum ProjectChange { + /// A project was successfully fetched and is now ready to use. + Ready(ProjectKey), + /// A project expired from the cache and was evicted. + Evicted(ProjectKey), +} + +/// A service implementing the [`ProjectCache`] interface. +pub struct ProjectCacheService { + store: ProjectStore, + source: ProjectSource, + config: Arc, + + project_update_rx: mpsc::UnboundedReceiver, + project_update_tx: mpsc::UnboundedSender, + + project_events_tx: broadcast::Sender, +} + +impl ProjectCacheService { + /// Creates a new [`ProjectCacheService`]. + pub fn new(config: Arc, source: ProjectSource) -> Self { + let (project_update_tx, project_update_rx) = mpsc::unbounded_channel(); + let project_events_tx = broadcast::channel(PROJECT_EVENTS_CHANNEL_SIZE).0; + + Self { + store: ProjectStore::default(), + source, + config, + project_update_rx, + project_update_tx, + project_events_tx, + } + } + + /// Consumes and starts a [`ProjectCacheService`]. + /// + /// Returns a [`ProjectCacheHandle`] to access the cache concurrently. + pub fn start(self) -> ProjectCacheHandle { + let (addr, addr_rx) = relay_system::channel(Self::name()); + + let handle = ProjectCacheHandle { + shared: self.store.shared(), + config: Arc::clone(&self.config), + service: addr, + project_changes: self.project_events_tx.clone(), + }; + + self.spawn_handler(addr_rx); + + handle + } + + /// Schedules a new [`Fetch`] and delivers the result to the [`Self::project_update_tx`] channel. + fn schedule_fetch(&self, fetch: Fetch) { + let source = self.source.clone(); + let project_updates = self.project_update_tx.clone(); + + tokio::spawn(async move { + tokio::time::sleep_until(fetch.when()).await; + + let state = match source + .fetch(fetch.project_key(), false, fetch.revision()) + .await + { + Ok(result) => result, + Err(err) => { + relay_log::error!( + tags.project_key = fetch.project_key().as_str(), + error = &err as &dyn std::error::Error, + "failed to fetch project from source: {fetch:?}" + ); + + // TODO: change this to ProjectState::Pending once we consider it safe to do so. + // see https://github.com/getsentry/relay/pull/4140. + ProjectState::Disabled.into() + } + }; + + let _ = project_updates.send(fetch.complete(state)); + }); + } +} + +/// All [`ProjectCacheService`] message handlers. +impl ProjectCacheService { + fn handle_fetch(&mut self, project_key: ProjectKey) { + if let Some(fetch) = self.store.try_begin_fetch(project_key, &self.config) { + self.schedule_fetch(fetch); + } + } + + fn handle_completed_fetch(&mut self, fetch: CompletedFetch) { + let project_key = fetch.project_key(); + + if let Some(fetch) = self.store.complete_fetch(fetch, &self.config) { + relay_log::trace!( + project_key = fetch.project_key().as_str(), + "re-scheduling project fetch: {fetch:?}" + ); + self.schedule_fetch(fetch); + return; + } + + let _ = self + .project_events_tx + .send(ProjectChange::Ready(project_key)); + + metric!( + gauge(RelayGauges::ProjectCacheNotificationChannel) = + self.project_events_tx.len() as u64 + ); + } + + fn handle_evict_stale_projects(&mut self) { + let on_evict = |project_key| { + let _ = self + .project_events_tx + .send(ProjectChange::Evicted(project_key)); + }; + + self.store.evict_stale_projects(&self.config, on_evict); + } + + fn handle_message(&mut self, message: ProjectCache) { + match message { + ProjectCache::Fetch(project_key) => self.handle_fetch(project_key), + } + } +} + +impl relay_system::Service for ProjectCacheService { + type Interface = ProjectCache; + + fn spawn_handler(mut self, mut rx: relay_system::Receiver) { + macro_rules! timed { + ($task:expr, $body:expr) => {{ + let task_name = $task; + metric!( + timer(RelayTimers::ProjectCacheTaskDuration), + task = task_name, + { $body } + ) + }}; + } + + tokio::spawn(async move { + let mut eviction_ticker = tokio::time::interval(self.config.cache_eviction_interval()); + + loop { + tokio::select! { + biased; + + Some(update) = self.project_update_rx.recv() => timed!( + "project_update", + self.handle_completed_fetch(update) + ), + Some(message) = rx.recv() => timed!( + message.variant(), + self.handle_message(message) + ), + _ = eviction_ticker.tick() => timed!( + "evict_stale_projects", + self.handle_evict_stale_projects() + ), + } + } + }); + } +} diff --git a/relay-server/src/services/projects/cache/state.rs b/relay-server/src/services/projects/cache/state.rs new file mode 100644 index 0000000000..ada692cef0 --- /dev/null +++ b/relay-server/src/services/projects/cache/state.rs @@ -0,0 +1,769 @@ +use std::fmt; +use std::sync::Arc; +use tokio::time::Instant; + +use arc_swap::ArcSwap; +use relay_base_schema::project::ProjectKey; +use relay_config::Config; +use relay_quotas::CachedRateLimits; +use relay_sampling::evaluation::ReservoirCounters; +use relay_statsd::metric; + +use crate::services::projects::project::{ProjectState, Revision}; +use crate::services::projects::source::SourceProjectState; +use crate::statsd::{RelayCounters, RelayHistograms}; +use crate::utils::RetryBackoff; + +/// The backing storage for a project cache. +/// +/// Exposes the only interface to delete from [`Shared`], guaranteed by +/// requiring exclusive/mutable access to [`ProjectStore`]. +/// +/// [`Shared`] can be extended through [`Shared::get_or_create`], in which case +/// the private state is missing. Users of [`Shared::get_or_create`] *must* trigger +/// a fetch to create the private state and keep it updated. +/// This guarantees that eventually the project state is populated, but for a undetermined, +/// time it is possible that shared state exists without the respective private state. +#[derive(Default)] +pub struct ProjectStore { + /// The shared state, which can be accessed concurrently. + shared: Arc, + /// The private, mutably exclusive state, used to maintain the project state. + private: hashbrown::HashMap, +} + +impl ProjectStore { + /// Retrieves a [`Shared`] handle which can be freely shared with multiple consumers. + pub fn shared(&self) -> Arc { + Arc::clone(&self.shared) + } + + /// Tries to begin a new fetch for the passed `project_key`. + /// + /// Returns `None` if no fetch is necessary or there is already a fetch ongoing. + /// A returned [`Fetch`] must be scheduled and completed with [`Fetch::complete`] and + /// [`Self::complete_fetch`]. + pub fn try_begin_fetch(&mut self, project_key: ProjectKey, config: &Config) -> Option { + self.get_or_create(project_key, config) + .try_begin_fetch(config) + } + + /// Completes a [`CompletedFetch`] started with [`Self::try_begin_fetch`]. + /// + /// Returns a new [`Fetch`] if another fetch must be scheduled. This happens when the fetched + /// [`ProjectState`] is still pending or already deemed expired. + #[must_use = "an incomplete fetch must be retried"] + pub fn complete_fetch(&mut self, fetch: CompletedFetch, config: &Config) -> Option { + // Eviction is not possible for projects which are currently being fetched. + // Hence if there was a started fetch, the project state must always exist at this stage. + debug_assert!(self.shared.projects.pin().get(&fetch.project_key).is_some()); + debug_assert!(self.private.get(&fetch.project_key).is_some()); + + let mut project = self.get_or_create(fetch.project_key, config); + project.complete_fetch(fetch); + // Schedule another fetch if necessary, usually should only happen if + // the completed fetch is pending. + project.try_begin_fetch(config) + } + + /// Evicts all stale, expired projects from the cache. + /// + /// Evicted projects are passed to the `on_evict` callback. Returns the total amount of evicted + /// projects. + pub fn evict_stale_projects(&mut self, config: &Config, mut on_evict: F) -> usize + where + F: FnMut(ProjectKey), + { + let eviction_start = Instant::now(); + + let expired = self.private.extract_if(|_, private| { + // We only evict projects which have fully expired and are not currently being fetched. + // + // This relies on the fact that a project should never remain in the `pending` state + // for long and is either always being fetched or successfully fetched. + private.last_fetch().map_or(false, |ts| { + ts.check_expiry(eviction_start, config).is_expired() + }) + }); + + let mut evicted = 0; + + let shared = self.shared.projects.pin(); + for (project_key, _) in expired { + let _removed = shared.remove(&project_key); + debug_assert!( + _removed.is_some(), + "an expired project must exist in the shared state" + ); + relay_log::trace!(tags.project_key = project_key.as_str(), "project evicted"); + + evicted += 1; + + on_evict(project_key); + } + drop(shared); + + metric!( + histogram(RelayHistograms::ProjectStateCacheSize) = self.shared.projects.len() as u64, + storage = "shared" + ); + metric!( + histogram(RelayHistograms::ProjectStateCacheSize) = self.private.len() as u64, + storage = "private" + ); + metric!(counter(RelayCounters::EvictingStaleProjectCaches) += evicted as u64); + + evicted + } + + /// Get a reference to the current project or create a new project. + /// + /// For internal use only, a created project must always be fetched immediately. + fn get_or_create(&mut self, project_key: ProjectKey, config: &Config) -> ProjectRef<'_> { + #[cfg(debug_assertions)] + if self.private.contains_key(&project_key) { + // We have exclusive access to the private part, there are no concurrent deletions + // hence if we have a private state there must always be a shared state as well. + // + // The opposite is not true, the shared state may have been created concurrently + // through the shared access. + debug_assert!(self.shared.projects.pin().contains_key(&project_key)); + } + + let private = self + .private + .entry(project_key) + .and_modify(|_| { + metric!(counter(RelayCounters::ProjectCacheHit) += 1); + }) + .or_insert_with(|| { + metric!(counter(RelayCounters::ProjectCacheMiss) += 1); + PrivateProjectState::new(project_key, config) + }); + + let shared = self + .shared + .projects + .pin() + .get_or_insert_with(project_key, Default::default) + .clone(); + + ProjectRef { private, shared } + } +} + +/// The shared and concurrently accessible handle to the project cache. +#[derive(Default)] +pub struct Shared { + projects: papaya::HashMap, +} + +impl Shared { + /// Returns the existing project state or creates a new one. + /// + /// The caller must ensure that the project cache is instructed to + /// [`super::ProjectCache::Fetch`] the retrieved project. + pub fn get_or_create(&self, project_key: ProjectKey) -> SharedProject { + // The fast path, we expect the project to exist. + let projects = self.projects.pin(); + if let Some(project) = projects.get(&project_key) { + return project.to_shared_project(); + } + + // The slow path, try to attempt to insert, somebody else may have been faster, but that's okay. + match projects.try_insert(project_key, Default::default()) { + Ok(inserted) => inserted.to_shared_project(), + Err(occupied) => occupied.current.to_shared_project(), + } + } +} + +/// TEST ONLY bypass to make the project cache mockable. +#[cfg(test)] +impl Shared { + /// Updates the project state for a project. + /// + /// TEST ONLY! + pub fn test_set_project_state(&self, project_key: ProjectKey, state: ProjectState) { + self.projects + .pin() + .get_or_insert_with(project_key, Default::default) + .set_project_state(state); + } + + /// Returns `true` if there exists a shared state for the passed `project_key`. + pub fn test_has_project_created(&self, project_key: ProjectKey) -> bool { + self.projects.pin().contains_key(&project_key) + } +} + +impl fmt::Debug for Shared { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Shared") + .field("num_projects", &self.projects.len()) + .finish() + } +} + +/// A single project from the [`Shared`] project cache. +pub struct SharedProject(Arc); + +impl SharedProject { + /// Returns a reference to the contained [`ProjectState`]. + pub fn project_state(&self) -> &ProjectState { + &self.0.state + } + + /// Returns a reference to the contained [`CachedRateLimits`]. + pub fn cached_rate_limits(&self) -> &CachedRateLimits { + // Exposing cached rate limits may be a bad idea, this allows mutation + // and caching of rate limits for pending projects, which may or may not be fine. + // Although, for now this is fine. + // + // Read only access is easily achievable if we return only the current rate limits. + &self.0.rate_limits + } + + /// Returns a reference to the contained [`ReservoirCounters`]. + pub fn reservoir_counters(&self) -> &ReservoirCounters { + &self.0.reservoir_counters + } +} + +/// TEST ONLY bypass to make the project cache mockable. +#[cfg(test)] +impl SharedProject { + /// Creates a new [`SharedProject`] for testing only. + pub fn for_test(state: ProjectState) -> Self { + Self(Arc::new(SharedProjectStateInner { + state, + ..Default::default() + })) + } +} + +/// Reference to a full project wrapping shared and private state. +struct ProjectRef<'a> { + shared: SharedProjectState, + private: &'a mut PrivateProjectState, +} + +impl ProjectRef<'_> { + fn try_begin_fetch(&mut self, config: &Config) -> Option { + let now = Instant::now(); + self.private + .try_begin_fetch(now, config) + .map(|fetch| fetch.with_revision(self.shared.revision())) + } + + fn complete_fetch(&mut self, fetch: CompletedFetch) { + let now = Instant::now(); + self.private.complete_fetch(&fetch, now); + + // Keep the old state around if the current fetch is pending. + // It may still be useful to callers. + match fetch.state { + SourceProjectState::New(state) if !state.is_pending() => { + self.shared.set_project_state(state); + } + _ => {} + } + } +} + +/// A [`Fetch`] token. +/// +/// When returned it must be executed and completed using [`Self::complete`]. +#[must_use = "a fetch must be executed"] +#[derive(Debug)] +pub struct Fetch { + project_key: ProjectKey, + when: Instant, + revision: Revision, +} + +impl Fetch { + /// Returns the [`ProjectKey`] of the project to fetch. + pub fn project_key(&self) -> ProjectKey { + self.project_key + } + + /// Returns when the fetch for the project should be scheduled. + /// + /// This can be now (as soon as possible) or a later point in time, if the project is currently + /// in a backoff. + pub fn when(&self) -> Instant { + self.when + } + + /// Returns the revisions of the currently cached project. + /// + /// If the upstream indicates it does not have a different version of this project + /// we do not need to update the local state. + pub fn revision(&self) -> Revision { + self.revision.clone() + } + + /// Completes the fetch with a result and returns a [`CompletedFetch`]. + pub fn complete(self, state: SourceProjectState) -> CompletedFetch { + CompletedFetch { + project_key: self.project_key, + state, + } + } + + fn with_revision(mut self, revision: Revision) -> Self { + self.revision = revision; + self + } +} + +/// The result of an executed [`Fetch`]. +#[must_use = "a completed fetch must be acted upon"] +#[derive(Debug)] +pub struct CompletedFetch { + project_key: ProjectKey, + state: SourceProjectState, +} + +impl CompletedFetch { + /// Returns the [`ProjectKey`] of the project which was fetched. + pub fn project_key(&self) -> ProjectKey { + self.project_key + } + + /// Returns `true` if the fetch completed with a pending status. + fn is_pending(&self) -> bool { + match &self.state { + SourceProjectState::New(state) => state.is_pending(), + SourceProjectState::NotModified => false, + } + } +} + +/// The state of a project contained in the [`Shared`] project cache. +/// +/// This state is interior mutable and allows updates to the project. +#[derive(Debug, Default, Clone)] +struct SharedProjectState(Arc>); + +impl SharedProjectState { + /// Updates the project state. + fn set_project_state(&self, state: ProjectState) { + let prev = self.0.rcu(|stored| SharedProjectStateInner { + state: state.clone(), + rate_limits: Arc::clone(&stored.rate_limits), + reservoir_counters: Arc::clone(&stored.reservoir_counters), + }); + + // Try clean expired reservoir counters. + // + // We do it after the `rcu`, to not re-run this more often than necessary. + if let Some(state) = state.enabled() { + let config = state.config.sampling.as_ref(); + if let Some(config) = config.and_then(|eb| eb.as_ref().ok()) { + // We can safely use previous here, the `rcu` just replaced the state, the + // reservoir counters did not change. + // + // `try_lock` to not potentially block, it's a best effort cleanup. + if let Ok(mut counters) = prev.reservoir_counters.try_lock() { + counters.retain(|key, _| config.rules.iter().any(|rule| rule.id == *key)); + } + } + } + } + + /// Extracts and clones the revision from the contained project state. + fn revision(&self) -> Revision { + self.0.as_ref().load().state.revision().clone() + } + + /// Transforms this interior mutable handle to an immutable [`SharedProject`]. + fn to_shared_project(&self) -> SharedProject { + SharedProject(self.0.as_ref().load_full()) + } +} + +/// The data contained in a [`SharedProjectState`]. +/// +/// All fields must be cheap to clone and are ideally just a single `Arc`. +/// Partial updates to [`SharedProjectState`], are performed using `rcu` cloning all fields. +#[derive(Debug, Default)] +struct SharedProjectStateInner { + state: ProjectState, + rate_limits: Arc, + reservoir_counters: ReservoirCounters, +} + +/// Current fetch state for a project. +#[derive(Debug)] +enum FetchState { + /// There is a fetch currently in progress. + InProgress, + /// A successful fetch is pending. + /// + /// This state is essentially only the initial state, a project + /// for the most part should always have a fetch in progress or be + /// in the non-pending state. + Pending { + /// Time when the next fetch should be attempted. + /// + /// `None` means soon as possible. + next_fetch_attempt: Option, + }, + /// There was a successful non-pending fetch. + Complete { + /// Time when the fetch was completed. + last_fetch: LastFetch, + }, +} + +/// Contains all mutable state necessary to maintain the project cache. +struct PrivateProjectState { + /// Project key this state belongs to. + project_key: ProjectKey, + + /// The current fetch state. + state: FetchState, + /// The current backoff used for calculating the next fetch attempt. + /// + /// The backoff is reset after a successful, non-pending fetch. + backoff: RetryBackoff, +} + +impl PrivateProjectState { + fn new(project_key: ProjectKey, config: &Config) -> Self { + Self { + project_key, + state: FetchState::Pending { + next_fetch_attempt: None, + }, + backoff: RetryBackoff::new(config.http_max_retry_interval()), + } + } + + /// Returns the [`LastFetch`] if there is currently no fetch in progress and the project + /// was fetched successfully before. + fn last_fetch(&self) -> Option { + match &self.state { + FetchState::Complete { last_fetch } => Some(*last_fetch), + _ => None, + } + } + + fn try_begin_fetch(&mut self, now: Instant, config: &Config) -> Option { + let when = match &self.state { + FetchState::InProgress {} => { + relay_log::trace!( + tags.project_key = self.project_key.as_str(), + "project fetch skipped, fetch in progress" + ); + return None; + } + FetchState::Pending { next_fetch_attempt } => { + // Schedule a new fetch, even if there is a backoff, it will just be sleeping for a while. + next_fetch_attempt.unwrap_or(now) + } + FetchState::Complete { last_fetch } => { + if last_fetch.check_expiry(now, config).is_fresh() { + // The current state is up to date, no need to start another fetch. + relay_log::trace!( + tags.project_key = self.project_key.as_str(), + "project fetch skipped, already up to date" + ); + return None; + } + now + } + }; + + // Mark a current fetch in progress. + self.state = FetchState::InProgress {}; + + relay_log::trace!( + tags.project_key = &self.project_key.as_str(), + attempts = self.backoff.attempt() + 1, + "project state fetch scheduled in {:?}", + when.saturating_duration_since(Instant::now()), + ); + + Some(Fetch { + project_key: self.project_key, + when, + revision: Revision::default(), + }) + } + + fn complete_fetch(&mut self, fetch: &CompletedFetch, now: Instant) { + debug_assert!( + matches!(self.state, FetchState::InProgress), + "fetch completed while there was no current fetch registered" + ); + + if fetch.is_pending() { + self.state = FetchState::Pending { + next_fetch_attempt: now.checked_add(self.backoff.next_backoff()), + }; + relay_log::trace!( + tags.project_key = &self.project_key.as_str(), + "project state fetch completed but still pending" + ); + } else { + relay_log::trace!( + tags.project_key = &self.project_key.as_str(), + "project state fetch completed with non-pending config" + ); + self.backoff.reset(); + self.state = FetchState::Complete { + last_fetch: LastFetch(now), + }; + } + } +} + +/// New type containing the last successful fetch time as an [`Instant`]. +#[derive(Debug, Copy, Clone)] +struct LastFetch(Instant); + +impl LastFetch { + /// Returns the [`Expiry`] of the last fetch in relation to `now`. + fn check_expiry(&self, now: Instant, config: &Config) -> Expiry { + let expiry = config.project_cache_expiry(); + let elapsed = now.saturating_duration_since(self.0); + + if elapsed >= expiry + config.project_grace_period() { + Expiry::Expired + } else if elapsed >= expiry { + Expiry::Stale + } else { + Expiry::Fresh + } + } +} + +/// Expiry state of a project. +#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)] +enum Expiry { + /// The project state is perfectly up to date. + Fresh, + /// The project state is outdated but events depending on this project state can still be + /// processed. The state should be refreshed in the background though. + Stale, + /// The project state is completely outdated and events need to be buffered up until the new + /// state has been fetched. + Expired, +} + +impl Expiry { + /// Returns `true` if the project is up-to-date and does not need to be fetched. + fn is_fresh(&self) -> bool { + matches!(self, Self::Fresh) + } + + /// Returns `true` if the project is expired and can be evicted. + fn is_expired(&self) -> bool { + matches!(self, Self::Expired) + } +} + +#[cfg(test)] +mod tests { + use std::time::Duration; + + use super::*; + + fn collect_evicted(store: &mut ProjectStore, config: &Config) -> Vec { + let mut evicted = Vec::new(); + let num_evicted = store.evict_stale_projects(config, |pk| evicted.push(pk)); + assert_eq!(evicted.len(), num_evicted); + evicted + } + + macro_rules! assert_state { + ($store:ident, $project_key:ident, $state:pat) => { + assert!(matches!( + $store.shared().get_or_create($project_key).project_state(), + $state + )); + }; + } + + #[test] + fn test_store_fetch() { + let project_key = ProjectKey::parse("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").unwrap(); + let mut store = ProjectStore::default(); + let config = Default::default(); + + let fetch = store.try_begin_fetch(project_key, &config).unwrap(); + assert_eq!(fetch.project_key(), project_key); + assert!(fetch.when() < Instant::now()); + assert_eq!(fetch.revision().as_str(), None); + assert_state!(store, project_key, ProjectState::Pending); + + // Fetch already in progress, nothing to do. + assert!(store.try_begin_fetch(project_key, &config).is_none()); + + // A pending fetch should trigger a new fetch immediately. + let fetch = fetch.complete(ProjectState::Pending.into()); + let fetch = store.complete_fetch(fetch, &config).unwrap(); + assert_eq!(fetch.project_key(), project_key); + // First backoff is still immediately. + assert!(fetch.when() < Instant::now()); + assert_eq!(fetch.revision().as_str(), None); + assert_state!(store, project_key, ProjectState::Pending); + + // Pending again. + let fetch = fetch.complete(ProjectState::Pending.into()); + let fetch = store.complete_fetch(fetch, &config).unwrap(); + assert_eq!(fetch.project_key(), project_key); + // This time it needs to be in the future (backoff). + assert!(fetch.when() > Instant::now()); + assert_eq!(fetch.revision().as_str(), None); + assert_state!(store, project_key, ProjectState::Pending); + + // Now complete with disabled. + let fetch = fetch.complete(ProjectState::Disabled.into()); + assert!(store.complete_fetch(fetch, &config).is_none()); + assert_state!(store, project_key, ProjectState::Disabled); + + // A new fetch is not yet necessary. + assert!(store.try_begin_fetch(project_key, &config).is_none()); + } + + #[tokio::test(start_paused = true)] + async fn test_store_fetch_pending_does_not_replace_state() { + let project_key = ProjectKey::parse("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").unwrap(); + let mut store = ProjectStore::default(); + let config = Config::from_json_value(serde_json::json!({ + "cache": { + "project_expiry": 5, + "project_grace_period": 5, + } + })) + .unwrap(); + + let fetch = store.try_begin_fetch(project_key, &config).unwrap(); + let fetch = fetch.complete(ProjectState::Disabled.into()); + assert!(store.complete_fetch(fetch, &config).is_none()); + assert_state!(store, project_key, ProjectState::Disabled); + + tokio::time::advance(Duration::from_secs(6)).await; + + let fetch = store.try_begin_fetch(project_key, &config).unwrap(); + let fetch = fetch.complete(ProjectState::Pending.into()); + // We're returned a new fetch, because the current one completed pending. + let fetch = store.complete_fetch(fetch, &config).unwrap(); + // The old cached state is still available and not replaced. + assert_state!(store, project_key, ProjectState::Disabled); + + let fetch = fetch.complete(ProjectState::new_allowed().into()); + assert!(store.complete_fetch(fetch, &config).is_none()); + assert_state!(store, project_key, ProjectState::Enabled(_)); + } + + #[tokio::test(start_paused = true)] + async fn test_store_evict_projects() { + let project_key1 = ProjectKey::parse("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").unwrap(); + let project_key2 = ProjectKey::parse("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb").unwrap(); + let mut store = ProjectStore::default(); + let config = Config::from_json_value(serde_json::json!({ + "cache": { + "project_expiry": 5, + "project_grace_period": 0, + } + })) + .unwrap(); + + let fetch = store.try_begin_fetch(project_key1, &config).unwrap(); + let fetch = fetch.complete(ProjectState::Disabled.into()); + assert!(store.complete_fetch(fetch, &config).is_none()); + + assert_eq!(collect_evicted(&mut store, &config), Vec::new()); + assert_state!(store, project_key1, ProjectState::Disabled); + + // 3 seconds is not enough to expire any project. + tokio::time::advance(Duration::from_secs(3)).await; + + assert_eq!(collect_evicted(&mut store, &config), Vec::new()); + assert_state!(store, project_key1, ProjectState::Disabled); + + let fetch = store.try_begin_fetch(project_key2, &config).unwrap(); + let fetch = fetch.complete(ProjectState::Disabled.into()); + assert!(store.complete_fetch(fetch, &config).is_none()); + + // A total of 6 seconds should expire the first project. + tokio::time::advance(Duration::from_secs(3)).await; + + assert_eq!(collect_evicted(&mut store, &config), vec![project_key1]); + assert_state!(store, project_key1, ProjectState::Pending); + assert_state!(store, project_key2, ProjectState::Disabled); + } + + #[tokio::test(start_paused = true)] + async fn test_store_evict_projects_pending_not_expired() { + let project_key1 = ProjectKey::parse("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").unwrap(); + let project_key2 = ProjectKey::parse("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb").unwrap(); + let mut store = ProjectStore::default(); + let config = Config::from_json_value(serde_json::json!({ + "cache": { + "project_expiry": 5, + "project_grace_period": 0, + } + })) + .unwrap(); + + let fetch = store.try_begin_fetch(project_key1, &config).unwrap(); + // Create a new project in a pending state, but never fetch it, this should also never expire. + store.shared().get_or_create(project_key2); + + tokio::time::advance(Duration::from_secs(6)).await; + + // No evictions, project is pending. + assert_eq!(collect_evicted(&mut store, &config), Vec::new()); + + // Complete the project. + let fetch = fetch.complete(ProjectState::Disabled.into()); + assert!(store.complete_fetch(fetch, &config).is_none()); + + // Still should not be evicted, because we do have 5 seconds to expire since completion. + assert_eq!(collect_evicted(&mut store, &config), Vec::new()); + tokio::time::advance(Duration::from_secs(4)).await; + assert_eq!(collect_evicted(&mut store, &config), Vec::new()); + assert_state!(store, project_key1, ProjectState::Disabled); + + // Just enough to expire the project. + tokio::time::advance(Duration::from_millis(1001)).await; + assert_eq!(collect_evicted(&mut store, &config), vec![project_key1]); + assert_state!(store, project_key1, ProjectState::Pending); + assert_state!(store, project_key2, ProjectState::Pending); + } + + #[tokio::test(start_paused = true)] + async fn test_store_evict_projects_stale() { + let project_key = ProjectKey::parse("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").unwrap(); + let mut store = ProjectStore::default(); + let config = Config::from_json_value(serde_json::json!({ + "cache": { + "project_expiry": 5, + "project_grace_period": 5, + } + })) + .unwrap(); + + let fetch = store.try_begin_fetch(project_key, &config).unwrap(); + let fetch = fetch.complete(ProjectState::Disabled.into()); + assert!(store.complete_fetch(fetch, &config).is_none()); + + // This is in the grace period, but not yet expired. + tokio::time::advance(Duration::from_millis(9500)).await; + + assert_eq!(collect_evicted(&mut store, &config), Vec::new()); + assert_state!(store, project_key, ProjectState::Disabled); + + // Now it's expired. + tokio::time::advance(Duration::from_secs(1)).await; + + assert_eq!(collect_evicted(&mut store, &config), vec![project_key]); + assert_state!(store, project_key, ProjectState::Pending); + } +} diff --git a/relay-server/src/services/projects/project/state/info.rs b/relay-server/src/services/projects/project/info.rs similarity index 86% rename from relay-server/src/services/projects/project/state/info.rs rename to relay-server/src/services/projects/project/info.rs index eab9b8fb62..d6fb7cfaab 100644 --- a/relay-server/src/services/projects/project/state/info.rs +++ b/relay-server/src/services/projects/project/info.rs @@ -1,3 +1,5 @@ +use std::sync::Arc; + use chrono::{DateTime, Utc}; use relay_base_schema::project::{ProjectId, ProjectKey}; @@ -16,7 +18,6 @@ use url::Url; use crate::envelope::Envelope; use crate::extractors::RequestMeta; use crate::services::outcome::DiscardReason; -use crate::services::projects::project::PublicKeyConfig; /// Information about an enabled project. /// @@ -32,7 +33,7 @@ pub struct ProjectInfo { /// are faked locally. pub last_change: Option>, /// The revision id of the project config. - pub rev: Option, + pub rev: Revision, /// Indicates that the project is disabled. /// A container of known public keys in the project. /// @@ -57,7 +58,7 @@ pub struct ProjectInfo { pub struct LimitedProjectInfo { pub project_id: Option, pub last_change: Option>, - pub rev: Option, + pub rev: Revision, pub public_keys: SmallVec<[PublicKeyConfig; 1]>, pub slug: Option, #[serde(with = "LimitedProjectConfig")] @@ -187,9 +188,6 @@ impl ProjectInfo { /// This scoping amends `RequestMeta::get_partial_scoping` by adding organization and key info. /// The processor must fetch the full scoping before attempting to rate limit with partial /// scoping. - /// - /// To get the own scoping of this ProjectKey without amending request information, use - /// [`Project::scoping`](crate::services::projects::project::Project::scoping) instead. pub fn scope_request(&self, meta: &RequestMeta) -> Scoping { let mut scoping = meta.get_partial_scoping(); @@ -248,3 +246,51 @@ impl ProjectInfo { self.config.features.has(feature) } } + +/// Represents a public key received from the projectconfig endpoint. +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct PublicKeyConfig { + /// Public part of key (random hash). + pub public_key: ProjectKey, + + /// The primary key of the DSN in Sentry's main database. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub numeric_id: Option, +} + +/// Represents a project info revision. +/// +/// A revision can be missing, a missing revision never compares equal +/// to any other revision. +/// +/// Revisions are internally reference counted and cheap to create. +#[derive(Debug, Default, Clone, Serialize, Deserialize)] +#[serde(transparent)] +pub struct Revision(Option>); + +impl Revision { + /// Returns the revision as a string reference. + /// + /// `None` is a revision that does not match any other revision, + /// not even another revision which is represented as `None`. + pub fn as_str(&self) -> Option<&str> { + self.0.as_deref() + } +} + +impl PartialEq for Revision { + fn eq(&self, other: &Self) -> bool { + match (&self.0, &other.0) { + (None, _) => false, + (_, None) => false, + (Some(left), Some(right)) => left == right, + } + } +} + +impl From<&str> for Revision { + fn from(value: &str) -> Self { + Self(Some(value.into())) + } +} diff --git a/relay-server/src/services/projects/project/mod.rs b/relay-server/src/services/projects/project/mod.rs index 460bff2659..5e23327370 100644 --- a/relay-server/src/services/projects/project/mod.rs +++ b/relay-server/src/services/projects/project/mod.rs @@ -1,753 +1,127 @@ +//! Types that represent the current project state. use std::sync::Arc; -use std::time::Duration; -use relay_base_schema::project::ProjectKey; -use relay_config::Config; -use relay_dynamic_config::{ErrorBoundary, Feature}; -use relay_metrics::Bucket; -use relay_quotas::{CachedRateLimits, DataCategory, MetricNamespaceScoping, RateLimits, Scoping}; -use relay_sampling::evaluation::ReservoirCounters; -use relay_statsd::metric; -use relay_system::{Addr, BroadcastChannel}; use serde::{Deserialize, Serialize}; -use tokio::time::Instant; - -use crate::envelope::ItemType; -use crate::services::metrics::{Aggregator, MergeBuckets}; -use crate::services::outcome::{DiscardReason, Outcome}; -use crate::services::processor::ProcessProjectMetrics; -use crate::services::projects::cache::{ - CheckedEnvelope, ProcessMetrics, ProjectCache, RequestUpdate, -}; -use crate::utils::{Enforcement, SeqCount}; - -use crate::statsd::RelayCounters; -use crate::utils::{CheckLimits, EnvelopeLimiter, ManagedEnvelope, RetryBackoff}; - -pub mod state; - -pub use state::{ - ExpiryState, LimitedParsedProjectState, ParsedProjectState, ProjectFetchState, ProjectInfo, - ProjectState, -}; - -/// Sender type for messages that respond with project states. -pub type ProjectSender = relay_system::BroadcastSender; - -/// Represents a public key received from the projectconfig endpoint. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[serde(rename_all = "camelCase")] -pub struct PublicKeyConfig { - /// Public part of key (random hash). - pub public_key: ProjectKey, - - /// The primary key of the DSN in Sentry's main database. - #[serde(default, skip_serializing_if = "Option::is_none")] - pub numeric_id: Option, -} - -/// Channel used to respond to state requests (e.g. by project config endpoint). -#[derive(Debug)] -struct StateChannel { - inner: BroadcastChannel, - no_cache: bool, -} - -impl StateChannel { - pub fn new() -> Self { - Self { - inner: BroadcastChannel::new(), - no_cache: false, - } - } - - pub fn no_cache(&mut self, no_cache: bool) -> &mut Self { - self.no_cache = no_cache; - self - } -} - -#[derive(Debug)] -enum GetOrFetch<'a> { - Cached(ProjectState), - Scheduled(&'a mut StateChannel), -} -/// Structure representing organization and project configuration for a project key. -/// -/// This structure no longer uniquely identifies a project. Instead, it identifies a project key. -/// Projects can define multiple keys, in which case this structure is duplicated for each instance. -#[derive(Debug)] -pub struct Project { - backoff: RetryBackoff, - next_fetch_attempt: Option, - last_updated_at: Instant, - project_key: ProjectKey, - config: Arc, - state: ProjectFetchState, - state_channel: Option, - rate_limits: CachedRateLimits, - last_no_cache: Instant, - reservoir_counters: ReservoirCounters, +use relay_base_schema::project::ProjectKey; +use relay_quotas::Scoping; + +mod info; + +pub use self::info::*; + +/// Representation of a project's current state. +#[derive(Clone, Debug, Default)] +pub enum ProjectState { + /// A valid project that is not disabled. + Enabled(Arc), + /// A project that was marked as "gone" by the upstream. This variant does not expose + /// any other project information. + Disabled, + /// A project to which one of the following conditions apply: + /// - The project has not yet been fetched. + /// - The upstream returned "pending" for this project (see [`crate::services::projects::source::upstream`]). + /// - The upstream returned an unparsable project so we have to try again. + /// - The project has expired and must be treated as "has not been fetched". + #[default] + Pending, } -impl Project { - /// Creates a new `Project`. - pub fn new(key: ProjectKey, config: Arc) -> Self { - Project { - backoff: RetryBackoff::new(config.http_max_retry_interval()), - next_fetch_attempt: None, - last_updated_at: Instant::now(), - project_key: key, - state: ProjectFetchState::expired(), - state_channel: None, - rate_limits: CachedRateLimits::new(), - last_no_cache: Instant::now(), - reservoir_counters: Arc::default(), - config, - } - } - - /// Returns the [`ReservoirCounters`] for the project. - pub fn reservoir_counters(&self) -> ReservoirCounters { - self.reservoir_counters.clone() - } - - /// Returns the current [`ProjectState`] attached to the project. - pub fn current_state(&self) -> ProjectState { - self.state.current_state(&self.config) - } - - /// Returns the currently active cached rate limits. - pub fn current_rate_limits(&mut self) -> &RateLimits { - self.rate_limits.current_limits() - } - - /// If a reservoir rule is no longer in the sampling config, we will remove those counters. - fn remove_expired_reservoir_rules(&self) { - let Some(state) = self.current_state().enabled() else { - return; - }; - - let Some(ErrorBoundary::Ok(config)) = state.config.sampling.as_ref() else { - return; - }; - - // Using try_lock to not slow down the project cache service. - if let Ok(mut guard) = self.reservoir_counters.try_lock() { - guard.retain(|key, _| config.rules.iter().any(|rule| rule.id == *key)); - } - } - - pub fn merge_rate_limits(&mut self, rate_limits: RateLimits) { - self.rate_limits.merge(rate_limits); - } - - /// Returns the next attempt `Instant` if backoff is initiated, or None otherwise. - pub fn next_fetch_attempt(&self) -> Option { - self.next_fetch_attempt - } - - /// The last time the project state was updated - pub fn last_updated_at(&self) -> Instant { - self.last_updated_at - } - - /// Refresh the update time of the project in order to delay eviction. - /// - /// Called by the project cache when the project state is refreshed. - pub fn refresh_updated_timestamp(&mut self) { - self.last_updated_at = Instant::now(); - } - - /// Collects internal project state and assembles a [`ProcessProjectMetrics`] message. - pub fn process_metrics(&mut self, message: ProcessMetrics) -> ProcessProjectMetrics { - let project_state = self.current_state(); - let rate_limits = self.rate_limits.current_limits().clone(); - - ProcessProjectMetrics { - project_state, - rate_limits, - - data: message.data, - project_key: message.project_key, - source: message.source, - received_at: message.received_at, - sent_at: message.sent_at, - } - } - - /// Returns a list of buckets back to the aggregator. +impl ProjectState { + /// Project state for an unknown but allowed project. /// - /// This is used to return flushed buckets back to the aggregator if the project has not been - /// loaded at the time of flush. - /// - /// Buckets at this stage are expected to be validated already. - pub fn return_buckets(&self, aggregator: &Addr, buckets: Vec) { - aggregator.send(MergeBuckets::new( - self.project_key, - buckets.into_iter().collect(), - )); - } - - /// Returns `true` if backoff expired and new attempt can be triggered. - fn can_fetch(&self) -> bool { - self.next_fetch_attempt - .map(|next_attempt_at| next_attempt_at <= Instant::now()) - .unwrap_or(true) - } - - /// Triggers a debounced refresh of the project state. - /// - /// If the state is already being updated in the background, this method checks if the request - /// needs to be upgraded with the `no_cache` flag to ensure a more recent update. - fn fetch_state( - &mut self, - project_cache: Addr, - no_cache: bool, - ) -> &mut StateChannel { - // If there is a running request and we do not need to upgrade it to no_cache, or if the - // backoff is started and the new attempt is still somewhere in the future, skip - // scheduling a new fetch. - let should_fetch = !matches!(self.state_channel, Some(ref channel) if channel.no_cache || !no_cache) - && self.can_fetch(); - - let channel = self.state_channel.get_or_insert_with(StateChannel::new); - - if should_fetch { - channel.no_cache(no_cache); - let attempts = self.backoff.attempt() + 1; - relay_log::debug!( - "project {} state requested {attempts} times", - self.project_key - ); - project_cache.send(RequestUpdate { - project_key: self.project_key, - no_cache, - cached_state: self.state.clone(), - }); - } - - channel + /// This state is used for forwarding in Proxy mode. + pub fn new_allowed() -> Self { + Self::Enabled(Arc::new(ProjectInfo { + project_id: None, + last_change: None, + rev: Default::default(), + public_keys: Default::default(), + slug: None, + config: Default::default(), + organization_id: None, + })) } - fn get_or_fetch_state( - &mut self, - project_cache: Addr, - mut no_cache: bool, - ) -> GetOrFetch<'_> { - // count number of times we are looking for the project state - metric!(counter(RelayCounters::ProjectStateGet) += 1); - - // Allow at most 1 no_cache request per second. Gracefully degrade to cached requests. - if no_cache { - if self.last_no_cache.elapsed() < Duration::from_secs(1) { - no_cache = false; - } else { - metric!(counter(RelayCounters::ProjectStateNoCache) += 1); - self.last_no_cache = Instant::now(); - } - } - - let cached_state = match self.state.expiry_state(&self.config) { - // Never use the cached state if `no_cache` is set. - _ if no_cache => None, - // There is no project state that can be used, fetch a state and return it. - ExpiryState::Expired => None, - // The project is semi-outdated, fetch new state but return old one. - ExpiryState::Stale(state) => Some(state.clone()), - // The project is not outdated, return early here to jump over fetching logic below. - ExpiryState::Updated(state) => return GetOrFetch::Cached(state.clone()), - }; - - let channel = self.fetch_state(project_cache, no_cache); - - match cached_state { - Some(state) => GetOrFetch::Cached(state), - None => GetOrFetch::Scheduled(channel), + /// Runs a post-deserialization step to normalize the project config (e.g. legacy fields). + pub fn sanitized(self) -> Self { + match self { + Self::Enabled(state) => Self::Enabled(Arc::new(state.as_ref().clone().sanitized())), + Self::Disabled => Self::Disabled, + Self::Pending => Self::Pending, } } - /// Returns the cached project state if it is valid. - /// - /// Depending on the state of the cache, this method takes different action: - /// - /// - If the cached state is up-to-date, this method simply returns `Some`. - /// - If the cached state is stale, this method triggers a refresh in the background and - /// returns `Some`. The stale period can be configured through - /// [`Config::project_grace_period`]. - /// - If there is no cached state or the cached state is fully outdated, this method triggers a - /// refresh in the background and returns `None`. - /// - /// If `no_cache` is set to true, this method always returns `None` and always triggers a - /// background refresh. - /// - /// To wait for a valid state instead, use [`get_state`](Self::get_state). - pub fn get_cached_state( - &mut self, - project_cache: Addr, - no_cache: bool, - ) -> ProjectState { - match self.get_or_fetch_state(project_cache, no_cache) { - GetOrFetch::Cached(state) => state, - GetOrFetch::Scheduled(_) => ProjectState::Pending, - } + /// Whether or not this state is pending. + pub fn is_pending(&self) -> bool { + matches!(self, Self::Pending) } - /// Obtains a valid project state and passes it to the sender once ready. - /// - /// This first checks if the state needs to be updated. This is the case if the project state - /// has passed its cache timeout. The `no_cache` flag forces an update. This does nothing if an - /// update is already running in the background. - /// - /// Independent of updating, _stale_ states are passed to the sender immediately as long as they - /// are in the [grace period](Config::project_grace_period). - pub fn get_state( - &mut self, - project_cache: Addr, - sender: ProjectSender, - no_cache: bool, - ) { - match self.get_or_fetch_state(project_cache, no_cache) { - GetOrFetch::Cached(state) => { - sender.send(state); - } - - GetOrFetch::Scheduled(channel) => { - channel.inner.attach(sender); - } + /// Utility function that returns the project config if enabled. + pub fn enabled(self) -> Option> { + match self { + Self::Enabled(info) => Some(info), + Self::Disabled | Self::Pending => None, } } - /// Ensures the project state gets updated. - /// - /// This first checks if the state needs to be updated. This is the case if the project state - /// has passed its cache timeout. The `no_cache` flag forces another update unless one is - /// already running in the background. - /// - /// If an update is required, the update will start in the background and complete at a later - /// point. Therefore, this method is useful to trigger an update early if it is already clear - /// that the project state will be needed soon. To retrieve an updated state, use - /// [`Project::get_state`] instead. - pub fn prefetch(&mut self, project_cache: Addr, no_cache: bool) -> &mut Self { - self.get_cached_state(project_cache, no_cache); - self - } - - /// Replaces the internal project state with a new one and triggers pending actions. - /// - /// Returns the *old* project state if it was replaced. + /// Returns the revision of the contained project info. /// - /// This flushes pending envelopes from [`ValidateEnvelope`] and - /// notifies all pending receivers from [`get_state`](Self::get_state). - /// - /// `no_cache` should be passed from the requesting call. Updates with `no_cache` will always - /// take precedence. - /// - /// [`ValidateEnvelope`]: crate::services::projects::cache::ValidateEnvelope - pub fn update_state( - &mut self, - project_cache: &Addr, - state: ProjectFetchState, - no_cache: bool, - ) -> Option { - // Initiate the backoff if the incoming state is invalid. Reset it otherwise. - if state.is_pending() { - self.next_fetch_attempt = Instant::now().checked_add(self.backoff.next_backoff()); - } else { - self.next_fetch_attempt = None; - self.backoff.reset(); - } - - let Some(channel) = self.state_channel.take() else { - relay_log::error!(tags.project_key = %self.project_key, "channel is missing for the state update"); - return None; - }; - - // If the channel has `no_cache` set but we are not a `no_cache` request, we have - // been superseeded. Put it back and let the other request take precedence. - if channel.no_cache && !no_cache { - self.state_channel = Some(channel); - return None; + /// `None` if the revision is missing or not available. + pub fn revision(&self) -> Revision { + match &self { + Self::Enabled(info) => info.rev.clone(), + Self::Disabled | Self::Pending => Revision::default(), } - - // If the state is pending, return back the taken channel and schedule state update. - if state.is_pending() { - // Only overwrite if the old state is expired: - let is_expired = matches!(self.state.expiry_state(&self.config), ExpiryState::Expired); - let old_state = match is_expired { - true => Some(std::mem::replace(&mut self.state, state)), - false => None, - }; - - self.state_channel = Some(channel); - let attempts = self.backoff.attempt() + 1; - relay_log::debug!( - "project {} state requested {attempts} times", - self.project_key - ); - - project_cache.send(RequestUpdate { - project_key: self.project_key, - no_cache, - cached_state: self.state.clone(), - }); - return old_state; - } - - let old_state = std::mem::replace(&mut self.state, state); - - // Flush all waiting recipients. - relay_log::debug!("project state {} updated", self.project_key); - channel.inner.send(self.state.current_state(&self.config)); - - self.after_state_updated(); - - Some(old_state) - } - - /// Called after all state validations and after the project state is updated. - /// - /// See also: [`Self::update_state`]. - fn after_state_updated(&mut self) { - // Check if the new sampling config got rid of any reservoir rules we have counters for. - self.remove_expired_reservoir_rules(); } /// Creates `Scoping` for this project if the state is loaded. /// /// Returns `Some` if the project state has been fetched and contains a project identifier, /// otherwise `None`. - pub fn scoping(&self) -> Option { - self.current_state().scoping(self.project_key) - } - - /// Runs the checks on incoming envelopes. - /// - /// See, [`crate::services::projects::cache::CheckEnvelope`] for more information - /// - /// * checks the rate limits - /// * validates the envelope meta in `check_request` - determines whether the given request - /// should be accepted or discarded - /// - /// IMPORTANT: If the [`ProjectState`] is invalid, the `check_request` will be skipped and only - /// rate limits will be validated. This function **must not** be called in the main processing - /// pipeline. - pub fn check_envelope( - &mut self, - mut envelope: ManagedEnvelope, - ) -> Result { - let state = match self.current_state() { - ProjectState::Enabled(state) => Some(state.clone()), - ProjectState::Disabled => { - // TODO(jjbayer): We should refactor this function to either return a Result or - // handle envelope rejections internally, but not both. - envelope.reject(Outcome::Invalid(DiscardReason::ProjectId)); - return Err(DiscardReason::ProjectId); - } - ProjectState::Pending => None, - }; - let mut scoping = envelope.scoping(); - - if let Some(ref state) = state { - scoping = state.scope_request(envelope.envelope().meta()); - envelope.scope(scoping); - - if let Err(reason) = state.check_envelope(envelope.envelope(), &self.config) { - envelope.reject(Outcome::Invalid(reason)); - return Err(reason); - } - } - - let current_limits = self.rate_limits.current_limits(); - - let quotas = state.as_deref().map(|s| s.get_quotas()).unwrap_or(&[]); - let envelope_limiter = EnvelopeLimiter::new(CheckLimits::NonIndexed, |item_scoping, _| { - Ok(current_limits.check_with_quotas(quotas, item_scoping)) - }); - - let (mut enforcement, mut rate_limits) = - envelope_limiter.compute(envelope.envelope_mut(), &scoping)?; - - let check_nested_spans = state - .as_ref() - .is_some_and(|s| s.has_feature(Feature::ExtractSpansFromEvent)); - - // If we can extract spans from the event, we want to try and count the number of nested - // spans to correctly emit negative outcomes in case the transaction itself is dropped. - if check_nested_spans { - sync_spans_to_enforcement(&envelope, &mut enforcement); + pub fn scoping(&self, project_key: ProjectKey) -> Option { + match self { + Self::Enabled(info) => info.scoping(project_key), + _ => None, } - - enforcement.apply_with_outcomes(&mut envelope); - - envelope.update(); - - // Special case: Expose active rate limits for all metric namespaces if there is at least - // one metrics item in the Envelope to communicate backoff to SDKs. This is necessary - // because `EnvelopeLimiter` cannot not check metrics without parsing item contents. - if envelope.envelope().items().any(|i| i.ty().is_metrics()) { - let mut metrics_scoping = scoping.item(DataCategory::MetricBucket); - metrics_scoping.namespace = MetricNamespaceScoping::Any; - rate_limits.merge(current_limits.check_with_quotas(quotas, metrics_scoping)); - } - - let envelope = if envelope.envelope().is_empty() { - // Individual rate limits have already been issued above - envelope.reject(Outcome::RateLimited(None)); - None - } else { - Some(envelope) - }; - - Ok(CheckedEnvelope { - envelope, - rate_limits, - }) } } -/// Adds category limits for the nested spans inside a transaction. -/// -/// On the fast path of rate limiting, we do not have nested spans of a transaction extracted -/// as top-level spans, thus if we limited a transaction, we want to count and emit negative -/// outcomes for each of the spans nested inside that transaction. -fn sync_spans_to_enforcement(envelope: &ManagedEnvelope, enforcement: &mut Enforcement) { - if !enforcement.is_event_active() { - return; - } - - let spans_count = count_nested_spans(envelope); - if spans_count == 0 { - return; - } - - if enforcement.event.is_active() { - enforcement.spans = enforcement.event.clone_for(DataCategory::Span, spans_count); - } - - if enforcement.event_indexed.is_active() { - enforcement.spans_indexed = enforcement - .event_indexed - .clone_for(DataCategory::SpanIndexed, spans_count); +impl From for ProjectState { + fn from(value: ParsedProjectState) -> Self { + let ParsedProjectState { disabled, info } = value; + match disabled { + true => Self::Disabled, + false => Self::Enabled(Arc::new(info)), + } } } -/// Counts the nested spans inside the first transaction envelope item inside the [`Envelope`](crate::envelope::Envelope). -fn count_nested_spans(envelope: &ManagedEnvelope) -> usize { - #[derive(Debug, Deserialize)] - struct PartialEvent { - spans: SeqCount, - } - - envelope - .envelope() - .items() - .find(|item| *item.ty() == ItemType::Transaction && !item.spans_extracted()) - .and_then(|item| serde_json::from_slice::(&item.payload()).ok()) - // We do + 1, since we count the transaction itself because it will be extracted - // as a span and counted during the slow path of rate limiting. - .map_or(0, |event| event.spans.0 + 1) +/// Project state as used in serialization / deserialization. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ParsedProjectState { + /// Whether the project state is disabled. + #[serde(default)] + pub disabled: bool, + /// Project info. + /// + /// This contains no information when `disabled` is `true`, except for + /// public keys in static project configs (see [`crate::services::projects::source::local`]). + #[serde(flatten)] + pub info: ProjectInfo, } -#[cfg(test)] -mod tests { - use crate::envelope::{ContentType, Envelope, Item}; - use crate::extractors::RequestMeta; - use crate::services::processor::ProcessingGroup; - use relay_base_schema::project::ProjectId; - use relay_event_schema::protocol::EventId; - use relay_test::mock_service; - use serde_json::json; - use smallvec::SmallVec; - - use super::*; - - #[test] - fn get_state_expired() { - for expiry in [9999, 0] { - let config = Arc::new( - Config::from_json_value(json!( - { - "cache": { - "project_expiry": expiry, - "project_grace_period": 0, - "eviction_interval": 9999 // do not evict - } - } - )) - .unwrap(), - ); - - // Initialize project with a state - let project_key = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fee").unwrap(); - let project_info = ProjectInfo { - project_id: Some(ProjectId::new(123)), - ..Default::default() - }; - let mut project = Project::new(project_key, config.clone()); - project.state = ProjectFetchState::enabled(project_info); - - if expiry > 0 { - // With long expiry, should get a state - assert!(matches!(project.current_state(), ProjectState::Enabled(_))); - } else { - // With 0 expiry, project should expire immediately. No state can be set. - assert!(matches!(project.current_state(), ProjectState::Pending)); - } - } - } - - #[tokio::test] - async fn test_stale_cache() { - let (addr, _) = mock_service("project_cache", (), |&mut (), _| {}); - - let config = Arc::new( - Config::from_json_value(json!( - { - "cache": { - "project_expiry": 100, - "project_grace_period": 0, - "eviction_interval": 9999 // do not evict - } - } - )) - .unwrap(), - ); - - let channel = StateChannel::new(); - - // Initialize project with a state. - let project_key = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fee").unwrap(); - let mut project = Project::new(project_key, config); - project.state_channel = Some(channel); - project.state = ProjectFetchState::allowed(); - - assert!(project.next_fetch_attempt.is_none()); - // Try to update project with errored project state. - project.update_state(&addr, ProjectFetchState::pending(), false); - // Since we got invalid project state we still keep the old one meaning there - // still must be the project id set. - assert!(matches!(project.current_state(), ProjectState::Enabled(_))); - assert!(project.next_fetch_attempt.is_some()); - - // This tests that we actually initiate the backoff and the backoff mechanism works: - // * first call to `update_state` with invalid ProjectState starts the backoff, but since - // it's the first attemt, we get Duration of 0. - // * second call to `update_state` here will bumpt the `next_backoff` Duration to somehing - // like ~ 1s - // * and now, by calling `fetch_state` we test that it's a noop, since if backoff is active - // we should never fetch - // * without backoff it would just panic, not able to call the ProjectCache service - let channel = StateChannel::new(); - project.state_channel = Some(channel); - project.update_state(&addr, ProjectFetchState::pending(), false); - project.fetch_state(addr, false); - } - - fn create_project(config: Option) -> Project { - let project_key = ProjectKey::parse("e12d836b15bb49d7bbf99e64295d995b").unwrap(); - let mut project = Project::new(project_key, Arc::new(Config::default())); - let mut project_info = ProjectInfo { - project_id: Some(ProjectId::new(42)), - ..Default::default() - }; - let mut public_keys = SmallVec::new(); - public_keys.push(PublicKeyConfig { - public_key: project_key, - numeric_id: None, - }); - project_info.public_keys = public_keys; - if let Some(config) = config { - project_info.config = serde_json::from_value(config).unwrap(); - } - project.state = ProjectFetchState::enabled(project_info); - project - } - - fn request_meta() -> RequestMeta { - let dsn = "https://e12d836b15bb49d7bbf99e64295d995b:@sentry.io/42" - .parse() - .unwrap(); - - RequestMeta::new(dsn) - } - - #[test] - fn test_track_nested_spans_outcomes() { - let mut project = create_project(Some(json!({ - "features": [ - "organizations:indexed-spans-extraction" - ], - "quotas": [{ - "id": "foo", - "categories": ["transaction"], - "window": 3600, - "limit": 0, - "reasonCode": "foo", - }] - }))); - - let mut envelope = Envelope::from_request(Some(EventId::new()), request_meta()); - - let mut transaction = Item::new(ItemType::Transaction); - transaction.set_payload( - ContentType::Json, - r#"{ - "event_id": "52df9022835246eeb317dbd739ccd059", - "type": "transaction", - "transaction": "I have a stale timestamp, but I'm recent!", - "start_timestamp": 1, - "timestamp": 2, - "contexts": { - "trace": { - "trace_id": "ff62a8b040f340bda5d830223def1d81", - "span_id": "bd429c44b67a3eb4" - } - }, - "spans": [ - { - "span_id": "bd429c44b67a3eb4", - "start_timestamp": 1, - "timestamp": null, - "trace_id": "ff62a8b040f340bda5d830223def1d81" - }, - { - "span_id": "bd429c44b67a3eb5", - "start_timestamp": 1, - "timestamp": null, - "trace_id": "ff62a8b040f340bda5d830223def1d81" - } - ] -}"#, - ); - - envelope.add_item(transaction); - - let (outcome_aggregator, mut outcome_aggregator_rx) = Addr::custom(); - let (test_store, _) = Addr::custom(); - - let managed_envelope = ManagedEnvelope::new( - envelope, - outcome_aggregator.clone(), - test_store, - ProcessingGroup::Transaction, - ); - - let _ = project.check_envelope(managed_envelope); - drop(outcome_aggregator); - - let expected = [ - (DataCategory::Transaction, 1), - (DataCategory::TransactionIndexed, 1), - (DataCategory::Span, 3), - (DataCategory::SpanIndexed, 3), - ]; - - for (expected_category, expected_quantity) in expected { - let outcome = outcome_aggregator_rx.blocking_recv().unwrap(); - assert_eq!(outcome.category, expected_category); - assert_eq!(outcome.quantity, expected_quantity); - } - } +/// Limited project state for external Relays. +#[derive(Debug, Clone, Serialize)] +#[serde(rename_all = "camelCase", remote = "ParsedProjectState")] +pub struct LimitedParsedProjectState { + /// Whether the project state is disabled. + pub disabled: bool, + /// Limited project info for external Relays. + /// + /// This contains no information when `disabled` is `true`, except for + /// public keys in static project configs (see [`crate::services::projects::source::local`]). + #[serde(with = "LimitedProjectInfo")] + #[serde(flatten)] + pub info: ProjectInfo, } diff --git a/relay-server/src/services/projects/project/state/fetch_state.rs b/relay-server/src/services/projects/project/state/fetch_state.rs deleted file mode 100644 index 020150f303..0000000000 --- a/relay-server/src/services/projects/project/state/fetch_state.rs +++ /dev/null @@ -1,162 +0,0 @@ -use std::sync::Arc; - -use tokio::time::Instant; - -use relay_config::Config; -use relay_dynamic_config::ProjectConfig; - -use crate::services::projects::project::state::info::ProjectInfo; -use crate::services::projects::project::ProjectState; - -/// Hides a cached project state and only exposes it if it has not expired. -#[derive(Clone, Debug)] -pub struct ProjectFetchState { - /// The time at which this project state was last updated. - last_fetch: Option, - state: ProjectState, -} - -impl ProjectFetchState { - /// Takes a [`ProjectState`] and sets it's last fetch to the current time. - pub fn new(state: ProjectState) -> Self { - Self { - last_fetch: Some(Instant::now()), - state, - } - } - - /// Refreshes the expiry of the fetch state. - pub fn refresh(old: ProjectFetchState) -> Self { - Self { - last_fetch: Some(Instant::now()), - state: old.state, - } - } - - /// Project state for an unknown but allowed project. - /// - /// This state is used for forwarding in Proxy mode. - pub fn allowed() -> Self { - Self::enabled(ProjectInfo { - project_id: None, - last_change: None, - rev: None, - public_keys: Default::default(), - slug: None, - config: ProjectConfig::default(), - organization_id: None, - }) - } - - /// An enabled project state created from a project info. - pub fn enabled(project_info: ProjectInfo) -> Self { - Self::new(ProjectState::Enabled(Arc::new(project_info))) - } - - // Returns a disabled state. - pub fn disabled() -> Self { - Self::new(ProjectState::Disabled) - } - - /// Returns a pending or invalid state. - pub fn pending() -> Self { - Self::new(ProjectState::Pending) - } - - /// Create a config that immediately counts as expired. - /// - /// This is what [`Project`](crate::services::projects::project::Project) initializes itself with. - pub fn expired() -> Self { - Self { - // Make sure the state immediately qualifies as expired: - last_fetch: None, - state: ProjectState::Pending, - } - } - - /// Sanitizes the contained project state. See [`ProjectState::sanitized`]. - pub fn sanitized(self) -> Self { - let Self { last_fetch, state } = self; - Self { - last_fetch, - state: state.sanitized(), - } - } - - /// Returns `true` if the contained state is pending. - pub fn is_pending(&self) -> bool { - matches!(self.state, ProjectState::Pending) - } - - /// Returns information about the expiry of a project state. - /// - /// If no detailed information is needed, use [`Self::current_state`] instead. - pub fn expiry_state(&self, config: &Config) -> ExpiryState { - match self.check_expiry(config) { - Expiry::Updated => ExpiryState::Updated(&self.state), - Expiry::Stale => ExpiryState::Stale(&self.state), - Expiry::Expired => ExpiryState::Expired, - } - } - - /// Returns the current project state, if it has not yet expired. - pub fn current_state(&self, config: &Config) -> ProjectState { - match self.expiry_state(config) { - ExpiryState::Updated(state) | ExpiryState::Stale(state) => state.clone(), - ExpiryState::Expired => ProjectState::Pending, - } - } - - /// Returns whether this state is outdated and needs to be refetched. - fn check_expiry(&self, config: &Config) -> Expiry { - let Some(last_fetch) = self.last_fetch else { - return Expiry::Expired; - }; - let expiry = match &self.state { - ProjectState::Enabled(info) if info.project_id.is_some() => { - config.project_cache_expiry() - } - _ => config.cache_miss_expiry(), - }; - - let elapsed = last_fetch.elapsed(); - if elapsed >= expiry + config.project_grace_period() { - Expiry::Expired - } else if elapsed >= expiry { - Expiry::Stale - } else { - Expiry::Updated - } - } - - /// Returns the revision of the contained project state. - /// - /// See: [`ProjectState::revision`]. - pub fn revision(&self) -> Option<&str> { - self.state.revision() - } -} - -/// Wrapper for a project state, with expiry information. -#[derive(Clone, Copy, Debug)] -pub enum ExpiryState<'a> { - /// An up-to-date project state. See [`Expiry::Updated`]. - Updated(&'a ProjectState), - /// A stale project state that can still be used. See [`Expiry::Stale`]. - Stale(&'a ProjectState), - /// An expired project state that should not be used. See [`Expiry::Expired`]. - Expired, -} - -/// The expiry status of a project state. Return value of [`ProjectFetchState::check_expiry`]. -#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)] -enum Expiry { - /// The project state is perfectly up to date. - Updated, - /// The project state is outdated but events depending on this project state can still be - /// processed. The state should be refreshed in the background though. - Stale, - /// The project state is completely outdated and events need to be buffered up until the new - /// state has been fetched. - Expired, -} diff --git a/relay-server/src/services/projects/project/state/mod.rs b/relay-server/src/services/projects/project/state/mod.rs deleted file mode 100644 index 868ad5ed4a..0000000000 --- a/relay-server/src/services/projects/project/state/mod.rs +++ /dev/null @@ -1,126 +0,0 @@ -//! Types that represent the current project state. -use std::sync::Arc; - -use serde::{Deserialize, Serialize}; - -use relay_base_schema::project::ProjectKey; -use relay_quotas::Scoping; - -mod fetch_state; -mod info; - -pub use self::fetch_state::{ExpiryState, ProjectFetchState}; -pub use self::info::{LimitedProjectInfo, ProjectInfo}; - -/// Representation of a project's current state. -#[derive(Clone, Debug)] -pub enum ProjectState { - /// A valid project that is not disabled. - Enabled(Arc), - /// A project that was marked as "gone" by the upstream. This variant does not expose - /// any other project information. - Disabled, - /// A project to which one of the following conditions apply: - /// - The project has not yet been fetched. - /// - The upstream returned "pending" for this project (see [`crate::services::projects::source::upstream`]). - /// - The upstream returned an unparsable project so we have to try again. - /// - The project has expired and must be treated as "has not been fetched". - Pending, -} - -impl ProjectState { - /// Runs a post-deserialization step to normalize the project config (e.g. legacy fields). - pub fn sanitized(self) -> Self { - match self { - ProjectState::Enabled(state) => { - ProjectState::Enabled(Arc::new(state.as_ref().clone().sanitized())) - } - ProjectState::Disabled => ProjectState::Disabled, - ProjectState::Pending => ProjectState::Pending, - } - } - - /// Whether or not this state is pending. - pub fn is_pending(&self) -> bool { - matches!(self, ProjectState::Pending) - } - - /// Utility function that returns the project config if enabled. - pub fn enabled(self) -> Option> { - match self { - ProjectState::Enabled(info) => Some(info), - ProjectState::Disabled | ProjectState::Pending => None, - } - } - - /// Returns the revision of the contained project info. - /// - /// `None` if the revision is missing or not available. - pub fn revision(&self) -> Option<&str> { - match &self { - ProjectState::Enabled(info) => info.rev.as_deref(), - ProjectState::Disabled => None, - ProjectState::Pending => None, - } - } - - /// Creates `Scoping` for this project if the state is loaded. - /// - /// Returns `Some` if the project state has been fetched and contains a project identifier, - /// otherwise `None`. - pub fn scoping(&self, project_key: ProjectKey) -> Option { - match self { - Self::Enabled(info) => info.scoping(project_key), - _ => None, - } - } -} - -impl From for ProjectState { - fn from(value: ParsedProjectState) -> Self { - let ParsedProjectState { disabled, info } = value; - match disabled { - true => ProjectState::Disabled, - false => ProjectState::Enabled(Arc::new(info)), - } - } -} - -/// Project state as used in serialization / deserialization. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ParsedProjectState { - /// Whether the project state is disabled. - #[serde(default)] - pub disabled: bool, - /// Project info. - /// - /// This contains no information when `disabled` is `true`, except for - /// public keys in static project configs (see [`crate::services::projects::source::local`]). - #[serde(flatten)] - pub info: ProjectInfo, -} - -/// Limited project state for external Relays. -#[derive(Debug, Clone, Serialize)] -#[serde(rename_all = "camelCase", remote = "ParsedProjectState")] -pub struct LimitedParsedProjectState { - /// Whether the project state is disabled. - pub disabled: bool, - /// Limited project info for external Relays. - /// - /// This contains no information when `disabled` is `true`, except for - /// public keys in static project configs (see [`crate::services::projects::source::local`]). - #[serde(with = "LimitedProjectInfo")] - #[serde(flatten)] - pub info: ProjectInfo, -} - -/// Response indicating whether a project state needs to be updated -/// or the upstream does not have a newer version. -#[derive(Debug, Clone)] -pub enum UpstreamProjectState { - /// The upstream sent a [`ProjectState`]. - New(ProjectState), - /// The upstream indicated that there is no newer version of the state available. - NotModified, -} diff --git a/relay-server/src/services/projects/source/mod.rs b/relay-server/src/services/projects/source/mod.rs index 5b208ef3ae..1311acfc40 100644 --- a/relay-server/src/services/projects/source/mod.rs +++ b/relay-server/src/services/projects/source/mod.rs @@ -15,8 +15,7 @@ pub mod local; pub mod redis; pub mod upstream; -use crate::services::projects::project::state::UpstreamProjectState; -use crate::services::projects::project::ProjectFetchState; +use crate::services::projects::project::{ProjectState, Revision}; use crate::services::upstream::UpstreamRelay; use self::local::{LocalProjectSource, LocalProjectSourceService}; @@ -91,53 +90,55 @@ impl ProjectSource { } } + /// Fetches a project with `project_key` from the configured sources. + /// + /// Returns a fully sanitized project. pub async fn fetch( self, project_key: ProjectKey, no_cache: bool, - cached_state: ProjectFetchState, - ) -> Result { + current_revision: Revision, + ) -> Result { let state_opt = self .local_source .send(FetchOptionalProjectState { project_key }) .await?; if let Some(state) = state_opt { - return Ok(ProjectFetchState::new(state)); + return Ok(state.into()); } match self.config.relay_mode() { - RelayMode::Proxy => return Ok(ProjectFetchState::allowed()), - RelayMode::Static => return Ok(ProjectFetchState::disabled()), - RelayMode::Capture => return Ok(ProjectFetchState::allowed()), + RelayMode::Proxy => return Ok(ProjectState::new_allowed().into()), + RelayMode::Static => return Ok(ProjectState::Disabled.into()), + RelayMode::Capture => return Ok(ProjectState::new_allowed().into()), RelayMode::Managed => (), // Proceed with loading the config from redis or upstream } - let current_revision = cached_state.revision().map(String::from); #[cfg(feature = "processing")] if let Some(redis_source) = self.redis_source { let current_revision = current_revision.clone(); let redis_permit = self.redis_semaphore.acquire().await?; let state_fetch_result = tokio::task::spawn_blocking(move || { - redis_source.get_config_if_changed(project_key, current_revision.as_deref()) + redis_source.get_config_if_changed(project_key, current_revision) }) .await?; drop(redis_permit); match state_fetch_result { // New state fetched from Redis, possibly pending. - Ok(UpstreamProjectState::New(state)) => { + // + // If it is pending, we must fallback to fetching from the upstream. + Ok(SourceProjectState::New(state)) => { let state = state.sanitized(); if !state.is_pending() { - return Ok(ProjectFetchState::new(state)); + return Ok(state.into()); } } // Redis reported that we're holding an up-to-date version of the state already, // refresh the state and return the old cached state again. - Ok(UpstreamProjectState::NotModified) => { - return Ok(ProjectFetchState::refresh(cached_state)) - } + Ok(SourceProjectState::NotModified) => return Ok(SourceProjectState::NotModified), Err(error) => { relay_log::error!( error = &error as &dyn std::error::Error, @@ -156,10 +157,10 @@ impl ProjectSource { }) .await?; - match state { - UpstreamProjectState::New(state) => Ok(ProjectFetchState::new(state.sanitized())), - UpstreamProjectState::NotModified => Ok(ProjectFetchState::refresh(cached_state)), - } + Ok(match state { + SourceProjectState::New(state) => SourceProjectState::New(state.sanitized()), + SourceProjectState::NotModified => SourceProjectState::NotModified, + }) } } @@ -189,10 +190,7 @@ pub struct FetchProjectState { /// The upstream is allowed to omit full project configs /// for requests for which the requester already has the most /// recent revision. - /// - /// Settings this to `None` will essentially always re-fetch - /// the project config. - pub current_revision: Option, + pub current_revision: Revision, /// If true, all caches should be skipped and a fresh state should be computed. pub no_cache: bool, @@ -208,3 +206,19 @@ impl FetchOptionalProjectState { self.project_key } } + +/// Response indicating whether a project state needs to be updated +/// or the upstream does not have a newer version. +#[derive(Debug, Clone)] +pub enum SourceProjectState { + /// The upstream sent a [`ProjectState`]. + New(ProjectState), + /// The upstream indicated that there is no newer version of the state available. + NotModified, +} + +impl From for SourceProjectState { + fn from(value: ProjectState) -> Self { + Self::New(value) + } +} diff --git a/relay-server/src/services/projects/source/redis.rs b/relay-server/src/services/projects/source/redis.rs index e2a520f4e7..7109c408cd 100644 --- a/relay-server/src/services/projects/source/redis.rs +++ b/relay-server/src/services/projects/source/redis.rs @@ -5,8 +5,8 @@ use relay_config::Config; use relay_redis::{RedisError, RedisPool}; use relay_statsd::metric; -use crate::services::projects::project::state::UpstreamProjectState; -use crate::services::projects::project::{ParsedProjectState, ProjectState}; +use crate::services::projects::project::{ParsedProjectState, ProjectState, Revision}; +use crate::services::projects::source::SourceProjectState; use crate::statsd::{RelayCounters, RelayHistograms, RelayTimers}; #[derive(Debug, Clone)] @@ -60,13 +60,13 @@ impl RedisProjectSource { pub fn get_config_if_changed( &self, key: ProjectKey, - revision: Option<&str>, - ) -> Result { + revision: Revision, + ) -> Result { let mut client = self.redis.client()?; let mut connection = client.connection()?; // Only check for the revision if we were passed a revision. - if let Some(revision) = revision { + if let Some(revision) = revision.as_str() { let current_revision: Option = relay_redis::redis::cmd("GET") .arg(self.get_redis_rev_key(key)) .query(&mut connection) @@ -80,7 +80,7 @@ impl RedisProjectSource { counter(RelayCounters::ProjectStateRedis) += 1, hit = "revision", ); - return Ok(UpstreamProjectState::NotModified); + return Ok(SourceProjectState::NotModified); } } @@ -94,7 +94,7 @@ impl RedisProjectSource { counter(RelayCounters::ProjectStateRedis) += 1, hit = "false" ); - return Ok(UpstreamProjectState::New(ProjectState::Pending)); + return Ok(SourceProjectState::New(ProjectState::Pending)); }; let response = ProjectState::from(parse_redis_response(response.as_slice())?); @@ -106,18 +106,18 @@ impl RedisProjectSource { // // While this is theoretically possible this should always been handled using the above revision // check using the additional Redis key. - if revision.is_some() && response.revision() == revision { + if response.revision() == revision { metric!( counter(RelayCounters::ProjectStateRedis) += 1, hit = "project_config_revision" ); - Ok(UpstreamProjectState::NotModified) + Ok(SourceProjectState::NotModified) } else { metric!( counter(RelayCounters::ProjectStateRedis) += 1, hit = "project_config" ); - Ok(UpstreamProjectState::New(response)) + Ok(SourceProjectState::New(response)) } } diff --git a/relay-server/src/services/projects/source/upstream.rs b/relay-server/src/services/projects/source/upstream.rs index bcbfd5a279..6f051c5ec4 100644 --- a/relay-server/src/services/projects/source/upstream.rs +++ b/relay-server/src/services/projects/source/upstream.rs @@ -18,10 +18,9 @@ use serde::{Deserialize, Serialize}; use tokio::sync::mpsc; use tokio::time::Instant; -use crate::services::projects::project::state::UpstreamProjectState; -use crate::services::projects::project::ParsedProjectState; -use crate::services::projects::project::ProjectState; -use crate::services::projects::source::FetchProjectState; +use crate::services::projects::project::Revision; +use crate::services::projects::project::{ParsedProjectState, ProjectState}; +use crate::services::projects::source::{FetchProjectState, SourceProjectState}; use crate::services::upstream::{ Method, RequestPriority, SendQuery, UpstreamQuery, UpstreamRelay, UpstreamRequestError, }; @@ -41,7 +40,7 @@ pub struct GetProjectStates { /// /// The revisions are mapped by index to the project key, /// this is a separate field to keep the API compatible. - revisions: Vec>, + revisions: Vec, /// If `true` the upstream should return a full configuration. /// /// Upstreams will ignore this for non-internal Relays. @@ -99,10 +98,10 @@ impl UpstreamQuery for GetProjectStates { #[derive(Debug)] struct ProjectStateChannel { // Main broadcast channel. - channel: BroadcastChannel, + channel: BroadcastChannel, // Additional broadcast channels tracked from merge operations. - merged: Vec>, - revision: Option, + merged: Vec>, + revision: Revision, deadline: Instant, no_cache: bool, attempts: u64, @@ -114,8 +113,8 @@ struct ProjectStateChannel { impl ProjectStateChannel { pub fn new( - sender: BroadcastSender, - revision: Option, + sender: BroadcastSender, + revision: Revision, timeout: Duration, no_cache: bool, ) -> Self { @@ -142,18 +141,14 @@ impl ProjectStateChannel { /// If the new revision is different from the contained revision this clears the revision. /// To not have multiple fetches per revision per batch, we need to find a common denominator /// for requests with different revisions, which is always to fetch the full project config. - pub fn attach( - &mut self, - sender: BroadcastSender, - revision: Option, - ) { + pub fn attach(&mut self, sender: BroadcastSender, revision: Revision) { self.channel.attach(sender); if self.revision != revision { - self.revision = None; + self.revision = Revision::default(); } } - pub fn send(self, state: UpstreamProjectState) { + pub fn send(self, state: SourceProjectState) { for channel in self.merged { channel.send(state.clone()); } @@ -179,7 +174,7 @@ impl ProjectStateChannel { self.merged.push(channel); self.merged.extend(merged); if self.revision != revision { - self.revision = None; + self.revision = Revision::default(); } self.deadline = self.deadline.max(deadline); self.no_cache |= no_cache; @@ -198,16 +193,16 @@ type ProjectStateChannels = HashMap; /// Internally it maintains the buffer queue of the incoming requests, which got scheduled to fetch the /// state and takes care of the backoff in case there is a problem with the requests. #[derive(Debug)] -pub struct UpstreamProjectSource(FetchProjectState, BroadcastSender); +pub struct UpstreamProjectSource(FetchProjectState, BroadcastSender); impl Interface for UpstreamProjectSource {} impl FromMessage for UpstreamProjectSource { - type Response = BroadcastResponse; + type Response = BroadcastResponse; fn from_message( message: FetchProjectState, - sender: BroadcastSender, + sender: BroadcastSender, ) -> Self { Self(message, sender) } @@ -467,7 +462,7 @@ impl UpstreamProjectSourceService { let mut result = "ok"; let state = if response.unchanged.contains(&key) { result = "ok_unchanged"; - UpstreamProjectState::NotModified + SourceProjectState::NotModified } else { let state = response .configs @@ -485,7 +480,7 @@ impl UpstreamProjectSourceService { ErrorBoundary::Ok(Some(state)) => state.into(), }; - UpstreamProjectState::New(state) + SourceProjectState::New(state) }; metric!( @@ -697,7 +692,7 @@ mod tests { let mut response1 = service.send(FetchProjectState { project_key, - current_revision: Some("123".to_owned()), + current_revision: "123".into(), no_cache: false, }); @@ -708,7 +703,7 @@ mod tests { // request, after responding to the first inflight request. let mut response2 = service.send(FetchProjectState { project_key, - current_revision: None, + current_revision: Revision::default(), no_cache: false, }); @@ -732,8 +727,8 @@ mod tests { .await; let (response1, response2) = futures::future::join(response1, response2).await; - assert!(matches!(response1, Ok(UpstreamProjectState::NotModified))); - assert!(matches!(response2, Ok(UpstreamProjectState::NotModified))); + assert!(matches!(response1, Ok(SourceProjectState::NotModified))); + assert!(matches!(response2, Ok(SourceProjectState::NotModified))); // No more messages to upstream expected. assert!(upstream_rx.try_recv().is_err()); diff --git a/relay-server/src/services/spooler/mod.rs b/relay-server/src/services/spooler/mod.rs index 8aa90b7b2c..6201ff9b7a 100644 --- a/relay-server/src/services/spooler/mod.rs +++ b/relay-server/src/services/spooler/mod.rs @@ -54,7 +54,7 @@ use tokio::sync::mpsc; use crate::envelope::{Envelope, EnvelopeError}; use crate::services::outcome::TrackOutcome; use crate::services::processor::ProcessingGroup; -use crate::services::projects::cache::{ProjectCache, RefreshIndexCache, UpdateSpoolIndex}; +use crate::services::projects::cache::legacy::{ProjectCache, RefreshIndexCache, UpdateSpoolIndex}; use crate::services::test_store::TestStore; use crate::statsd::{RelayCounters, RelayGauges, RelayHistograms, RelayTimers}; use crate::utils::{ManagedEnvelope, MemoryChecker}; @@ -156,7 +156,6 @@ impl QueueKey { /// It's sent in response to [`DequeueMany`] message from the [`ProjectCache`]. #[derive(Debug)] pub struct UnspooledEnvelope { - pub key: QueueKey, pub managed_envelope: ManagedEnvelope, } @@ -375,7 +374,6 @@ impl InMemory { self.envelope_count = self.envelope_count.saturating_sub(1); sender .send(UnspooledEnvelope { - key, managed_envelope: envelope, }) .ok(); @@ -585,14 +583,9 @@ impl OnDisk { }; match self.extract_envelope(envelope, services) { - Ok((key, managed_envelopes)) => { + Ok((_, managed_envelopes)) => { for managed_envelope in managed_envelopes { - sender - .send(UnspooledEnvelope { - key, - managed_envelope, - }) - .ok(); + sender.send(UnspooledEnvelope { managed_envelope }).ok(); } } Err(err) => relay_log::error!( @@ -1436,10 +1429,7 @@ mod tests { sender: tx.clone(), }); - let UnspooledEnvelope { - key: _, - managed_envelope, - } = rx.recv().await.unwrap(); + let UnspooledEnvelope { managed_envelope } = rx.recv().await.unwrap(); let received_at_received = managed_envelope.received_at(); // Check if the original start time elapsed to the same second as the restored one. diff --git a/relay-server/src/statsd.rs b/relay-server/src/statsd.rs index 94cd59f771..3e5ed3ec48 100644 --- a/relay-server/src/statsd.rs +++ b/relay-server/src/statsd.rs @@ -7,8 +7,6 @@ pub enum RelayGauges { /// The state of Relay with respect to the upstream connection. /// Possible values are `0` for normal operations and `1` for a network outage. NetworkOutage, - /// The number of items currently in the garbage disposal queue. - ProjectCacheGarbageQueueSize, /// The number of envelopes waiting for project states in memory. /// /// This number is always <= `EnvelopeQueueSize`. @@ -46,13 +44,14 @@ pub enum RelayGauges { /// The number of idle connections in the Redis Pool. #[cfg(feature = "processing")] RedisPoolIdleConnections, + /// The number of notifications in the broadcast channel of the project cache. + ProjectCacheNotificationChannel, } impl GaugeMetric for RelayGauges { fn name(&self) -> &'static str { match self { RelayGauges::NetworkOutage => "upstream.network_outage", - RelayGauges::ProjectCacheGarbageQueueSize => "project_cache.garbage.queue_size", RelayGauges::BufferEnvelopesMemoryCount => "buffer.envelopes_mem_count", RelayGauges::BufferEnvelopesDiskCount => "buffer.envelopes_disk_count", RelayGauges::BufferPeriodicUnspool => "buffer.unspool.periodic", @@ -64,6 +63,9 @@ impl GaugeMetric for RelayGauges { RelayGauges::RedisPoolConnections => "redis.pool.connections", #[cfg(feature = "processing")] RelayGauges::RedisPoolIdleConnections => "redis.pool.idle_connections", + RelayGauges::ProjectCacheNotificationChannel => { + "project_cache.notification_channel.size" + } } } } @@ -395,8 +397,6 @@ pub enum RelayTimers { /// Total time in milliseconds an envelope spends in Relay from the time it is received until it /// finishes processing and has been submitted to the upstream. EnvelopeTotalTime, - /// Total time in milliseconds spent evicting outdated and unused projects happens. - ProjectStateEvictionDuration, /// Total time in milliseconds spent fetching queued project configuration updates requests to /// resolve. /// @@ -487,25 +487,30 @@ pub enum RelayTimers { /// /// - `message`: The type of message that was processed. ProcessMessageDuration, - /// Timing in milliseconds for handling a project cache message. + /// Timing in milliseconds for processing a task in the project cache service. /// /// This metric is tagged with: - /// - `message`: The type of message that was processed. - ProjectCacheMessageDuration, - /// Timing in milliseconds for processing a message in the buffer service. + /// - `task`: The type of the task the project cache does. + ProjectCacheTaskDuration, + /// Timing in milliseconds for handling a legacy project cache message. /// /// This metric is tagged with: - /// /// - `message`: The type of message that was processed. - BufferMessageProcessDuration, - /// Timing in milliseconds for processing a task in the project cache service. + LegacyProjectCacheMessageDuration, + /// Timing in milliseconds for processing a task in the legacy project cache service. /// /// A task is a unit of work the service does. Each branch of the /// `tokio::select` is a different task type. /// /// This metric is tagged with: - /// - `task`: The type of the task the processor does. - ProjectCacheTaskDuration, + /// - `task`: The type of the task the project cache does. + LegacyProjectCacheTaskDuration, + /// Timing in milliseconds for processing a message in the buffer service. + /// + /// This metric is tagged with: + /// + /// - `message`: The type of message that was processed. + BufferMessageProcessDuration, /// Timing in milliseconds for handling and responding to a health check request. /// /// This metric is tagged with: @@ -572,7 +577,6 @@ impl TimerMetric for RelayTimers { RelayTimers::EnvelopeWaitTime => "event.wait_time", RelayTimers::EnvelopeProcessingTime => "event.processing_time", RelayTimers::EnvelopeTotalTime => "event.total_time", - RelayTimers::ProjectStateEvictionDuration => "project_state.eviction.duration", RelayTimers::ProjectStateRequestDuration => "project_state.request.duration", #[cfg(feature = "processing")] RelayTimers::ProjectStateDecompression => "project_state.decompression", @@ -585,9 +589,12 @@ impl TimerMetric for RelayTimers { RelayTimers::ReplayRecordingProcessing => "replay.recording.process", RelayTimers::GlobalConfigRequestDuration => "global_config.requests.duration", RelayTimers::ProcessMessageDuration => "processor.message.duration", - RelayTimers::ProjectCacheMessageDuration => "project_cache.message.duration", RelayTimers::BufferMessageProcessDuration => "buffer.message.duration", RelayTimers::ProjectCacheTaskDuration => "project_cache.task.duration", + RelayTimers::LegacyProjectCacheMessageDuration => { + "legacy_project_cache.message.duration" + } + RelayTimers::LegacyProjectCacheTaskDuration => "legacy_project_cache.task.duration", RelayTimers::HealthCheckDuration => "health.message.duration", #[cfg(feature = "processing")] RelayTimers::RateLimitBucketsDuration => "processor.rate_limit_buckets", @@ -683,15 +690,6 @@ pub enum RelayCounters { /// - `invalid`: Data was considered invalid and could not be recovered. The reason indicates /// the validation that failed. Outcomes, - /// Number of times a project state is looked up from the cache. - /// - /// This includes lookups for both cached and new projects. As part of this, updates for - /// outdated or expired project caches are triggered. - /// - /// Related metrics: - /// - `project_cache.hit`: For successful cache lookups, even for outdated projects. - /// - `project_cache.miss`: For failed lookups resulting in an update. - ProjectStateGet, /// Number of project state HTTP requests. /// /// Relay updates projects in batches. Every update cycle, Relay requests @@ -701,15 +699,6 @@ pub enum RelayCounters { /// Note that after an update loop has completed, there may be more projects pending updates. /// This is indicated by `project_state.pending`. ProjectStateRequest, - /// Number of times a project config was requested with `.no-cache`. - /// - /// This effectively counts the number of envelopes or events that have been sent with a - /// corresponding DSN. Actual queries to the upstream may still be deduplicated for these - /// project state requests. - /// - /// A maximum of 1 such requests per second is allowed per project key. This metric counts only - /// permitted requests. - ProjectStateNoCache, /// Number of times a project state is requested from the central Redis cache. /// /// This metric is tagged with: @@ -881,9 +870,7 @@ impl CounterMetric for RelayCounters { RelayCounters::BufferSpooledEnvelopes => "buffer.spooled_envelopes", RelayCounters::BufferUnspooledEnvelopes => "buffer.unspooled_envelopes", RelayCounters::Outcomes => "events.outcomes", - RelayCounters::ProjectStateGet => "project_state.get", RelayCounters::ProjectStateRequest => "project_state.request", - RelayCounters::ProjectStateNoCache => "project_state.no_cache", #[cfg(feature = "processing")] RelayCounters::ProjectStateRedis => "project_state.redis.requests", RelayCounters::ProjectUpstreamCompleted => "project_upstream.completed", diff --git a/relay-server/src/testutils.rs b/relay-server/src/testutils.rs index 9f4c255d35..56e9742925 100644 --- a/relay-server/src/testutils.rs +++ b/relay-server/src/testutils.rs @@ -20,6 +20,7 @@ use crate::service::create_redis_pools; use crate::services::global_config::GlobalConfigHandle; use crate::services::outcome::TrackOutcome; use crate::services::processor::{self, EnvelopeProcessorService}; +use crate::services::projects::cache::ProjectCacheHandle; use crate::services::projects::project::ProjectInfo; use crate::services::test_store::TestStore; use crate::utils::{ThreadPool, ThreadPoolBuilder}; @@ -117,7 +118,6 @@ pub fn empty_envelope_with_dsn(dsn: &str) -> Box { pub fn create_test_processor(config: Config) -> EnvelopeProcessorService { let (outcome_aggregator, _) = mock_service("outcome_aggregator", (), |&mut (), _| {}); - let (project_cache, _) = mock_service("project_cache", (), |&mut (), _| {}); let (aggregator, _) = mock_service("aggregator", (), |&mut (), _| {}); let (upstream_relay, _) = mock_service("upstream_relay", (), |&mut (), _| {}); let (test_store, _) = mock_service("test_store", (), |&mut (), _| {}); @@ -132,12 +132,12 @@ pub fn create_test_processor(config: Config) -> EnvelopeProcessorService { create_processor_pool(), Arc::clone(&config), GlobalConfigHandle::fixed(Default::default()), + ProjectCacheHandle::for_test(), Cogs::noop(), #[cfg(feature = "processing")] redis_pools, processor::Addrs { outcome_aggregator, - project_cache, upstream_relay, test_store, #[cfg(feature = "processing")] @@ -162,6 +162,7 @@ pub fn create_test_processor_with_addrs( create_processor_pool(), Arc::clone(&config), GlobalConfigHandle::fixed(Default::default()), + ProjectCacheHandle::for_test(), Cogs::noop(), #[cfg(feature = "processing")] redis_pools, diff --git a/relay-server/src/utils/garbage.rs b/relay-server/src/utils/garbage.rs deleted file mode 100644 index 5b86cc80cd..0000000000 --- a/relay-server/src/utils/garbage.rs +++ /dev/null @@ -1,113 +0,0 @@ -use std::sync::atomic::{AtomicUsize, Ordering}; -use std::sync::{mpsc, Arc}; -use std::thread::JoinHandle; - -/// Garbage disposal agent. -/// -/// Spawns a background thread which drops items sent to it via [`GarbageDisposal::dispose`]. -#[derive(Debug)] -pub struct GarbageDisposal { - tx: mpsc::Sender, - queue_size: Arc, -} - -impl GarbageDisposal { - /// Returns a new instance plus a handle to join on the background thread. - /// Currently only used in tests. - fn new_joinable() -> (Self, JoinHandle<()>) { - let (tx, rx) = mpsc::channel(); - - let queue_size = Arc::new(AtomicUsize::new(0)); - let queue_size_clone = queue_size.clone(); - let join_handle = std::thread::spawn(move || { - relay_log::debug!("Start garbage collection thread"); - while let Ok(object) = rx.recv() { - queue_size_clone.fetch_sub(1, Ordering::Relaxed); // Wraps around on overflow - drop(object); - } - relay_log::debug!("Stop garbage collection thread"); - }); - - (Self { tx, queue_size }, join_handle) - } - - /// Spawns a new garbage disposal instance. - /// Every instance has its own background thread that received items to be dropped via - /// [`Self::dispose`]. - /// When the instance is dropped, the background thread stops automatically. - pub fn new() -> Self { - let (instance, _) = Self::new_joinable(); - instance - } - - /// Defers dropping an object by sending it to the background thread. - pub fn dispose(&self, object: U) - where - T: From, - { - self.queue_size.fetch_add(1, Ordering::Relaxed); - self.tx - .send(T::from(object)) - .map_err(|e| { - relay_log::error!("failed to send object to garbage disposal thread, drop here"); - drop(e.0); - }) - .ok(); - } - - /// Get current queue size. - pub fn queue_size(&self) -> usize { - self.queue_size.load(Ordering::Relaxed) - } -} - -impl Default for GarbageDisposal { - fn default() -> Self { - Self::new() - } -} - -#[cfg(test)] -mod tests { - use std::sync::{Arc, Mutex}; - use std::thread::ThreadId; - - use super::GarbageDisposal; - - struct SomeStruct { - thread_ids: Arc>>, - } - - impl Drop for SomeStruct { - fn drop(&mut self) { - self.thread_ids - .lock() - .unwrap() - .push(std::thread::current().id()) - } - } - - #[test] - fn test_garbage_disposal() { - let thread_ids = Arc::new(Mutex::new(Vec::::new())); - - let x1 = SomeStruct { - thread_ids: thread_ids.clone(), - }; - drop(x1); - - let x2 = SomeStruct { - thread_ids: thread_ids.clone(), - }; - - let (garbage, join_handle) = GarbageDisposal::::new_joinable(); - garbage.dispose(x2); - drop(garbage); // breaks the while loop by dropping rx - join_handle.join().ok(); // wait for thread to finish its work - - let thread_ids = thread_ids.lock().unwrap(); - assert_eq!(thread_ids.len(), 2); - assert_eq!(thread_ids[0], std::thread::current().id()); - assert!(thread_ids[0] != thread_ids[1]); - } -} diff --git a/relay-server/src/utils/mod.rs b/relay-server/src/utils/mod.rs index 8d12ee347e..9c428d3b5c 100644 --- a/relay-server/src/utils/mod.rs +++ b/relay-server/src/utils/mod.rs @@ -1,6 +1,5 @@ mod api; mod dynamic_sampling; -mod garbage; mod managed_envelope; mod multipart; mod param_parser; @@ -22,7 +21,6 @@ mod unreal; pub use self::api::*; pub use self::dynamic_sampling::*; -pub use self::garbage::*; pub use self::managed_envelope::*; pub use self::memory::*; pub use self::multipart::*; diff --git a/tests/integration/test_projectconfigs.py b/tests/integration/test_projectconfigs.py index 18f1f2b39d..c10e807cf8 100644 --- a/tests/integration/test_projectconfigs.py +++ b/tests/integration/test_projectconfigs.py @@ -12,6 +12,42 @@ RelayInfo = namedtuple("RelayInfo", ["id", "public_key", "secret_key", "internal"]) +def request_config( + relay, packed, signature, *, version="3", relay_id=None, headers=None +): + return relay.post( + f"/api/0/relays/projectconfigs/?version={version}", + data=packed, + headers={ + "X-Sentry-Relay-Id": relay_id or relay.relay_id, + "X-Sentry-Relay-Signature": signature, + } + | (headers or {}), + ) + + +def get_response(relay, packed, signature, *, version="3", relay_id=None, headers=None): + data = None + deadline = time.monotonic() + 15 + while time.monotonic() <= deadline: + response = request_config( + relay, + packed, + signature, + version=version, + relay_id=relay_id, + headers=headers, + ) + assert response.ok + data = response.json() + print(data) + if data["configs"] or data.get("global") or data.get("unchanged"): + return data, response + time.sleep(0.01) + + assert False, "Relay did still not receive a project config from minisentry" + + @pytest.mark.parametrize( "caller, projects", [ @@ -84,16 +120,9 @@ def test_dynamic_relays(mini_sentry, relay, caller, projects): packed, signature = caller.secret_key.pack(request) - resp = relay1.post( - "/api/0/relays/projectconfigs/?version=2", - data=packed, - headers={"X-Sentry-Relay-Id": caller.id, "X-Sentry-Relay-Signature": signature}, - ) - - assert resp.ok + data, _ = get_response(relay1, packed, signature, relay_id=caller.id) # test that it returns valid data - data = resp.json() for p in public_keys: assert data["configs"][p] is not None @@ -104,14 +133,7 @@ def test_invalid_json(mini_sentry, relay): body = {} # missing the required `publicKeys` field packed, signature = SecretKey.parse(relay.secret_key).pack(body) - response = relay.post( - "/api/0/relays/projectconfigs/?version=2", - data=packed, - headers={ - "X-Sentry-Relay-Id": relay.relay_id, - "X-Sentry-Relay-Signature": signature, - }, - ) + response = request_config(relay, packed, signature) assert response.status_code == 400 # Bad Request assert "JSON" in response.text @@ -121,7 +143,7 @@ def test_invalid_signature(mini_sentry, relay): relay = relay(mini_sentry) response = relay.post( - "/api/0/relays/projectconfigs/?version=2", + "/api/0/relays/projectconfigs/?version=3", data='{"publicKeys":[]}', headers={ "X-Sentry-Relay-Id": relay.relay_id, @@ -148,17 +170,9 @@ def test_broken_projectkey(mini_sentry, relay): } packed, signature = SecretKey.parse(relay.secret_key).pack(body) - response = relay.post( - "/api/0/relays/projectconfigs/?version=2", - data=packed, - headers={ - "X-Sentry-Relay-Id": relay.relay_id, - "X-Sentry-Relay-Signature": signature, - }, - ) + data, _ = get_response(relay, packed, signature) - assert response.ok - assert public_key in response.json()["configs"] + assert public_key in data["configs"] def test_pending_projects(mini_sentry, relay): @@ -172,63 +186,17 @@ def test_pending_projects(mini_sentry, relay): body = {"publicKeys": [public_key]} packed, signature = SecretKey.parse(relay.secret_key).pack(body) - def request_config(): - return relay.post( - "/api/0/relays/projectconfigs/?version=3", - data=packed, - headers={ - "X-Sentry-Relay-Id": relay.relay_id, - "X-Sentry-Relay-Signature": signature, - }, - ) - - response = request_config() - + response = request_config(relay, packed, signature) assert response.ok data = response.json() assert public_key in data["pending"] assert public_key not in data["configs"] - deadline = time.monotonic() + 15 - while time.monotonic() <= deadline: - response = request_config() - assert response.ok - data = response.json() - if data["configs"]: - break - else: - assert False, "Relay did still not receive a project config from minisentry" + data, _ = get_response(relay, packed, signature) assert public_key in data["configs"] assert data.get("pending") is None -def request_config(relay, packed, signature, version: str): - return relay.post( - f"/api/0/relays/projectconfigs/?version={version}", - data=packed, - headers={ - "X-Sentry-Relay-Id": relay.relay_id, - "X-Sentry-Relay-Signature": signature, - }, - ) - - -def get_response(relay, packed, signature, version="3"): - data = None - deadline = time.monotonic() + 15 - while time.monotonic() <= deadline: - # send 1 r/s - time.sleep(1) - response = request_config(relay, packed, signature, version) - assert response.ok - data = response.json() - if data["configs"]: - break - else: - print("Relay did still not receive a project config from minisentry") - return data - - def test_unparsable_project_config(mini_sentry, relay): project_key = 42 relay_config = { @@ -287,7 +255,7 @@ def assert_clear_test_failures(): assert_clear_test_failures() # The state should be fixed and updated by now, since we keep re-trying to fetch new one all the time. - data = get_response(relay, packed, signature) + data, _ = get_response(relay, packed, signature) assert data["configs"][public_key]["projectId"] == project_key assert not data["configs"][public_key]["disabled"] time.sleep(1) @@ -326,7 +294,7 @@ def test_cached_project_config(mini_sentry, relay): body = {"publicKeys": [public_key]} packed, signature = SecretKey.parse(relay.secret_key).pack(body) - data = get_response(relay, packed, signature) + data, _ = get_response(relay, packed, signature) assert data["configs"][public_key]["projectId"] == project_key assert not data["configs"][public_key]["disabled"] @@ -336,7 +304,7 @@ def test_cached_project_config(mini_sentry, relay): # Give it a bit time for update to go through. time.sleep(1) - data = get_response(relay, packed, signature) + data, _ = get_response(relay, packed, signature) assert data["configs"][public_key]["projectId"] == project_key assert not data["configs"][public_key]["disabled"] @@ -373,7 +341,7 @@ def test_get_global_config(mini_sentry, relay): body = {"publicKeys": [], "global": True} packed, signature = SecretKey.parse(relay.secret_key).pack(body) - data = get_response(relay, packed, signature, version="3") + data, _ = get_response(relay, packed, signature, version="3") global_extraction_config = data["global"].pop("metricExtraction") assert "span_metrics_common" in global_extraction_config["groups"] @@ -389,19 +357,11 @@ def test_compression(mini_sentry, relay): body = {"publicKeys": [public_key]} packed, signature = SecretKey.parse(relay.secret_key).pack(body) - response = relay.post( - "/api/0/relays/projectconfigs/?version=2", - data=packed, - headers={ - "Accept-Encoding": "gzip", - "X-Sentry-Relay-Id": relay.relay_id, - "X-Sentry-Relay-Signature": signature, - }, + data, response = get_response( + relay, packed, signature, headers={"Accept-Encoding": "gzip"} ) - - assert response.ok assert response.headers["content-encoding"] == "gzip" - assert public_key in response.json()["configs"] + assert public_key in data["configs"] def test_unchanged_projects(mini_sentry, relay): @@ -413,32 +373,14 @@ def test_unchanged_projects(mini_sentry, relay): body = {"publicKeys": [public_key], "revisions": ["123"]} packed, signature = SecretKey.parse(relay.secret_key).pack(body) - def request_config(): - return relay.post( - "/api/0/relays/projectconfigs/?version=3", - data=packed, - headers={ - "X-Sentry-Relay-Id": relay.relay_id, - "X-Sentry-Relay-Signature": signature, - }, - ) - - response = request_config() + response = request_config(relay, packed, signature) assert response.ok data = response.json() assert public_key in data["pending"] assert public_key not in data.get("unchanged", []) - deadline = time.monotonic() + 15 - while time.monotonic() <= deadline: - response = request_config() - assert response.ok - data = response.json() - if data.get("unchanged"): - break - else: - assert False, "Relay did still not receive a project config from minisentry" + data, response = get_response(relay, packed, signature) assert public_key in data["unchanged"] assert public_key not in data["configs"] diff --git a/tests/integration/test_query.py b/tests/integration/test_query.py index a934319c2c..d7ab5cc2e3 100644 --- a/tests/integration/test_query.py +++ b/tests/integration/test_query.py @@ -16,7 +16,12 @@ def test_local_project_config(mini_sentry, relay): project_id = 42 config = mini_sentry.basic_project_config(project_id) relay_config = { - "cache": {"file_interval": 1, "project_expiry": 1, "project_grace_period": 0} + "cache": { + "file_interval": 1, + "project_expiry": 1, + "project_grace_period": 0, + "eviction_interval": 1, + } } relay = relay(mini_sentry, relay_config, wait_health_check=False) relay.config_dir.mkdir("projects").join("42.json").write( @@ -45,17 +50,14 @@ def test_local_project_config(mini_sentry, relay): assert event["logentry"] == {"formatted": "Hello, World!"} relay.config_dir.join("projects").join("42.json").write( - json.dumps({"disabled": True}) + json.dumps({"disabled": True, "publicKeys": config["publicKeys"]}) ) - time.sleep(2) - try: - # This may or may not respond with 403, depending on how quickly the future to fetch project - # states executes. - relay.send_event(project_id, dsn_key=dsn_key) - except HTTPError: - pass + time.sleep(2) + relay.send_event(project_id, dsn_key=dsn_key) + time.sleep(0.3) + pytest.raises(HTTPError, lambda: relay.send_event(project_id, dsn_key=dsn_key)) pytest.raises(queue.Empty, lambda: mini_sentry.captured_events.get(timeout=1)) @@ -80,6 +82,7 @@ def get_project_config(): "miss_expiry": 1, "project_expiry": 1, "project_grace_period": grace_period, + "eviction_interval": 1, } }, ) @@ -324,7 +327,8 @@ def get_project_config(): "cache": { "miss_expiry": 1, "project_expiry": 1, - "project_grace_period": 0, + "project_grace_period": 5, + "eviction_interval": 1, } }, ) @@ -343,8 +347,8 @@ def get_project_config(): relay.send_event(42) - assert project_config_fetch.wait(timeout=1) - assert with_rev.wait(timeout=1) + assert project_config_fetch.wait(timeout=2) + assert with_rev.wait(timeout=2) event = mini_sentry.captured_events.get(timeout=1).get_event() assert event["logentry"] == {"formatted": "Hello, World!"}