@@ -11,18 +11,22 @@ use {
11
11
types:: { Envelope , EnvelopeType0 } ,
12
12
} ,
13
13
base64:: Engine ,
14
- helpers:: update_message_processing_status,
14
+ helpers:: { dead_letter_give_up_check , update_message_processing_status} ,
15
15
relay_client:: http:: Client ,
16
16
relay_rpc:: {
17
17
domain:: { ClientId , DecodedClientId , Topic } ,
18
18
rpc:: { msg_id:: MsgId , Publish , JSON_RPC_VERSION_STR } ,
19
19
} ,
20
20
sqlx:: { postgres:: PgListener , PgPool } ,
21
- std:: sync:: {
22
- atomic:: { AtomicUsize , Ordering } ,
23
- Arc ,
21
+ std:: {
22
+ sync:: {
23
+ atomic:: { AtomicUsize , Ordering } ,
24
+ Arc ,
25
+ } ,
26
+ time:: Duration ,
24
27
} ,
25
- tracing:: { info, instrument, warn} ,
28
+ tokio:: time:: { interval, timeout} ,
29
+ tracing:: { error, info, instrument, warn} ,
26
30
types:: SubscriberNotificationStatus ,
27
31
wc:: metrics:: otel:: Context ,
28
32
} ;
@@ -31,12 +35,19 @@ pub mod helpers;
31
35
pub mod types;
32
36
33
37
// TODO: These should be configurable, add to the config
34
- // Maximum of the parallel messages processing workers
38
+ /// Maximum of the parallel messages processing workers
35
39
const MAX_WORKERS : usize = 10 ;
36
- // Number of workers to be spawned on the service start to clean the queue
40
+ /// Number of workers to be spawned on the service start to clean the queue
37
41
const START_WORKERS : usize = 10 ;
38
42
// Messages queue stats observing database polling interval
39
43
const QUEUE_STATS_POLLING_INTERVAL : std:: time:: Duration = std:: time:: Duration :: from_secs ( 60 ) ;
44
+ /// Maximum publishing time before the publish will be considered as failed
45
+ /// and the messages in queue with the `processing` state will be returned to the queue
46
+ const PUBLISHING_TIMEOUT : Duration = Duration :: from_secs ( 60 * 5 ) ; // 5 minutes
47
+ /// Interval to check for dead letters
48
+ const DEAD_LETTER_POLL_INTERVAL : Duration = Duration :: from_secs ( 60 ) ;
49
+ /// Total maximum time to process the message before it will be considered as failed
50
+ const PUBLISHING_GIVE_UP_TIMEOUT : Duration = Duration :: from_secs ( 60 * 60 * 24 ) ; // One day
40
51
41
52
#[ instrument( skip_all) ]
42
53
pub async fn start (
@@ -59,7 +70,20 @@ pub async fn start(
59
70
} ) ;
60
71
}
61
72
62
- // TODO: Spawned tasks counter should be exported to metrics
73
+ // Spawning dead letters check task
74
+ tokio:: spawn ( {
75
+ let postgres = postgres. clone ( ) ;
76
+ async move {
77
+ let mut poll_interval = interval ( DEAD_LETTER_POLL_INTERVAL ) ;
78
+ loop {
79
+ poll_interval. tick ( ) . await ;
80
+ if let Err ( e) = helpers:: dead_letters_check ( PUBLISHING_TIMEOUT , & postgres) . await {
81
+ warn ! ( "Error on dead letters check: {:?}" , e) ;
82
+ }
83
+ }
84
+ }
85
+ } ) ;
86
+
63
87
let spawned_tasks_counter = Arc :: new ( AtomicUsize :: new ( 0 ) ) ;
64
88
65
89
// Spawning initial workers to process messages from the queue in case
@@ -176,16 +200,36 @@ async fn process_queued_messages(
176
200
if let Some ( notification) = result {
177
201
let notification_id = notification. subscriber_notification ;
178
202
info ! ( "Got a notification with id: {}" , notification_id) ;
179
- process_notification ( notification, relay_http_client. clone ( ) , metrics, analytics)
180
- . await ?;
181
203
182
- update_message_processing_status (
183
- notification_id ,
184
- SubscriberNotificationStatus :: Published ,
185
- postgres ,
204
+ let process_result = process_with_timeout (
205
+ PUBLISHING_TIMEOUT ,
206
+ notification ,
207
+ relay_http_client . clone ( ) ,
186
208
metrics,
187
- )
188
- . await ?;
209
+ analytics,
210
+ ) ;
211
+
212
+ match process_result. await {
213
+ Ok ( _) => {
214
+ update_message_processing_status (
215
+ notification_id,
216
+ SubscriberNotificationStatus :: Published ,
217
+ postgres,
218
+ metrics,
219
+ )
220
+ . await ?;
221
+ }
222
+ Err ( e) => {
223
+ warn ! ( "Error on `process_notification`: {:?}" , e) ;
224
+ update_message_status_queued_or_failed (
225
+ notification_id,
226
+ PUBLISHING_GIVE_UP_TIMEOUT ,
227
+ postgres,
228
+ metrics,
229
+ )
230
+ . await ?;
231
+ }
232
+ }
189
233
} else {
190
234
info ! ( "No more notifications to process, stopping the loop" ) ;
191
235
break ;
@@ -194,6 +238,31 @@ async fn process_queued_messages(
194
238
Ok ( ( ) )
195
239
}
196
240
241
+ /// Process publishing with the threshold timeout
242
+ #[ instrument( skip( relay_http_client, metrics, analytics, notification) ) ]
243
+ async fn process_with_timeout (
244
+ execution_threshold : Duration ,
245
+ notification : NotificationToProcess ,
246
+ relay_http_client : Arc < Client > ,
247
+ metrics : Option < & Metrics > ,
248
+ analytics : & NotifyAnalytics ,
249
+ ) -> crate :: error:: Result < ( ) > {
250
+ match timeout (
251
+ execution_threshold,
252
+ process_notification ( notification, relay_http_client. clone ( ) , metrics, analytics) ,
253
+ )
254
+ . await
255
+ {
256
+ Ok ( result) => {
257
+ result?;
258
+ }
259
+ Err ( e) => {
260
+ return Err ( crate :: error:: Error :: TokioTimeElapsed ( e) ) ;
261
+ }
262
+ } ;
263
+ Ok ( ( ) )
264
+ }
265
+
197
266
#[ instrument( skip_all, fields( notification = ?notification) ) ]
198
267
async fn process_notification (
199
268
notification : NotificationToProcess ,
@@ -263,3 +332,34 @@ async fn process_notification(
263
332
264
333
Ok ( ( ) )
265
334
}
335
+
336
+ /// Updates message status back to `Queued` or mark as `Failed`
337
+ /// depending on the `giveup_threshold` messsage creation time
338
+ #[ instrument( skip( postgres, metrics) ) ]
339
+ async fn update_message_status_queued_or_failed (
340
+ notification_id : uuid:: Uuid ,
341
+ giveup_threshold : Duration ,
342
+ postgres : & PgPool ,
343
+ metrics : Option < & Metrics > ,
344
+ ) -> crate :: error:: Result < ( ) > {
345
+ if dead_letter_give_up_check ( notification_id, giveup_threshold, postgres) . await ? {
346
+ error ! ( "Message was not processed during the giving up threshold, marking it as failed" ) ;
347
+ update_message_processing_status (
348
+ notification_id,
349
+ SubscriberNotificationStatus :: Failed ,
350
+ postgres,
351
+ metrics,
352
+ )
353
+ . await ?;
354
+ } else {
355
+ update_message_processing_status (
356
+ notification_id,
357
+ SubscriberNotificationStatus :: Queued ,
358
+ postgres,
359
+ metrics,
360
+ )
361
+ . await ?;
362
+ }
363
+
364
+ Ok ( ( ) )
365
+ }
0 commit comments