@@ -312,28 +312,43 @@ async def add_batch_of_requests(
312
312
unprocessed_requests = list [UnprocessedRequest ]()
313
313
state = self ._state .current_value
314
314
315
- # Prepare a dictionary to track existing requests by their unique keys.
316
- existing_unique_keys : dict [str , Path ] = {}
317
- existing_request_files = await self ._get_request_files (self .path_to_rq )
315
+ all_requests = state .forefront_requests | state .regular_requests
318
316
319
- for request_file in existing_request_files :
320
- existing_request = await self ._parse_request_file (request_file )
321
- if existing_request is not None :
322
- existing_unique_keys [existing_request .unique_key ] = request_file
317
+ requests_to_enqueue = {}
323
318
324
- # Process each request in the batch .
319
+ # Determine which requests can be added or are modified .
325
320
for request in requests :
326
- existing_request_file = existing_unique_keys .get (request .unique_key )
327
- existing_request = None
328
-
329
- # Only load the full request from disk if we found a duplicate
330
- if existing_request_file is not None :
331
- existing_request = await self ._parse_request_file (existing_request_file )
321
+ # Check if the request has already been handled.
322
+ if request .unique_key in state .handled_requests :
323
+ processed_requests .append (
324
+ ProcessedRequest (
325
+ unique_key = request .unique_key ,
326
+ was_already_present = True ,
327
+ was_already_handled = True ,
328
+ )
329
+ )
330
+ # Check if the request is already in progress.
331
+ # Or if the request is already in the queue and the `forefront` flag is not used, we do not change the
332
+ # position of the request.
333
+ elif (request .unique_key in state .in_progress_requests ) or (
334
+ request .unique_key in all_requests and not forefront
335
+ ):
336
+ processed_requests .append (
337
+ ProcessedRequest (
338
+ unique_key = request .unique_key ,
339
+ was_already_present = True ,
340
+ was_already_handled = False ,
341
+ )
342
+ )
343
+ # These requests must either be added or update their position.
344
+ else :
345
+ requests_to_enqueue [request .unique_key ] = request
332
346
333
- # If there is no existing request with the same unique key, add the new request.
334
- if existing_request is None :
347
+ # Process each request in the batch.
348
+ for request in requests_to_enqueue .values ():
349
+ # If the request is not already in the RQ, this is a new request.
350
+ if request .unique_key not in all_requests :
335
351
request_path = self ._get_request_path (request .unique_key )
336
-
337
352
# Add sequence number to ensure FIFO ordering using state.
338
353
if forefront :
339
354
sequence_number = state .forefront_sequence_counter
@@ -352,9 +367,6 @@ async def add_batch_of_requests(
352
367
new_total_request_count += 1
353
368
new_pending_request_count += 1
354
369
355
- # Add to our index for subsequent requests in this batch
356
- existing_unique_keys [request .unique_key ] = self ._get_request_path (request .unique_key )
357
-
358
370
processed_requests .append (
359
371
ProcessedRequest (
360
372
unique_key = request .unique_key ,
@@ -363,57 +375,33 @@ async def add_batch_of_requests(
363
375
)
364
376
)
365
377
366
- # If the request already exists in the RQ, just update it if needed.
367
- else :
368
- # Set the processed request flags.
369
- was_already_present = existing_request is not None
370
- was_already_handled = existing_request .unique_key in state .handled_requests
371
-
372
- # If the request is already in the RQ and handled, just continue with the next one.
373
- if was_already_present and was_already_handled :
374
- processed_requests .append (
375
- ProcessedRequest (
376
- unique_key = request .unique_key ,
377
- was_already_present = True ,
378
- was_already_handled = True ,
379
- )
380
- )
378
+ # If the request already exists in the RQ and use the forefront flag to update its position
379
+ elif forefront :
380
+ # If the request is among `regular`, remove it from its current position.
381
+ if request .unique_key in state .regular_requests :
382
+ state .regular_requests .pop (request .unique_key )
381
383
382
- # If the request is already in the RQ but not handled yet, update it.
383
- elif was_already_present and not was_already_handled :
384
- # Update request type (forefront vs regular) in state
385
- if forefront :
386
- # Move from regular to forefront if needed
387
- if existing_request .unique_key in state .regular_requests :
388
- state .regular_requests .pop (existing_request .unique_key )
389
- if existing_request .unique_key not in state .forefront_requests :
390
- state .forefront_requests [existing_request .unique_key ] = state .forefront_sequence_counter
391
- state .forefront_sequence_counter += 1
392
- elif (
393
- existing_request .unique_key not in state .forefront_requests
394
- and existing_request .unique_key not in state .regular_requests
395
- ):
396
- # Keep as regular if not already forefront
397
- state .regular_requests [existing_request .unique_key ] = state .sequence_counter
398
- state .sequence_counter += 1
399
-
400
- processed_requests .append (
401
- ProcessedRequest (
402
- unique_key = request .unique_key ,
403
- was_already_present = True ,
404
- was_already_handled = False ,
405
- )
384
+ # If the request is already in `forefront`, we just need to update its position.
385
+ state .forefront_requests [request .unique_key ] = state .forefront_sequence_counter
386
+ state .forefront_sequence_counter += 1
387
+
388
+ processed_requests .append (
389
+ ProcessedRequest (
390
+ unique_key = request .unique_key ,
391
+ was_already_present = True ,
392
+ was_already_handled = False ,
406
393
)
394
+ )
407
395
408
- else :
409
- logger .warning (f'Request with unique key "{ request .unique_key } " could not be processed.' )
410
- unprocessed_requests .append (
411
- UnprocessedRequest (
412
- unique_key = request .unique_key ,
413
- url = request .url ,
414
- method = request .method ,
415
- )
396
+ else :
397
+ logger .warning (f'Request with unique key "{ request .unique_key } " could not be processed.' )
398
+ unprocessed_requests .append (
399
+ UnprocessedRequest (
400
+ unique_key = request .unique_key ,
401
+ url = request .url ,
402
+ method = request .method ,
416
403
)
404
+ )
417
405
418
406
await self ._update_metadata (
419
407
update_modified_at = True ,
@@ -752,10 +740,7 @@ async def _get_request_files(cls, path_to_rq: Path) -> list[Path]:
752
740
files = await asyncio .to_thread (list , path_to_rq .glob ('*.json' ))
753
741
754
742
# Filter out metadata file and non-file entries.
755
- filtered = filter (
756
- lambda request_file : request_file .is_file () and request_file .name != METADATA_FILENAME ,
757
- files ,
758
- )
743
+ filtered = filter (lambda request_file : request_file .is_file () and request_file .name != METADATA_FILENAME , files )
759
744
760
745
return list (filtered )
761
746
0 commit comments