@@ -418,7 +418,7 @@ def _subsample_paired_fastq_files( # pylint:disable=too-many-branches
418
418
else :
419
419
raise FileNotFoundError ("No fastq file found" )
420
420
421
- if fastq_file_1 .suffix .endswith (".gz$ " ):
421
+ if fastq_file_1 .suffix .endswith (".gz" ):
422
422
compressed = True
423
423
num_lines = sum (1 for line in gzip .open (fastq_file_1 )) # pylint:disable=consider-using-with
424
424
else :
@@ -443,8 +443,8 @@ def _subsample_paired_fastq_files( # pylint:disable=too-many-branches
443
443
)
444
444
return
445
445
446
- rand_list = random .sample (range (0 , range_limit - 1 ), sampling_size )
447
- random_indices = { idx * 4 : 1 for idx in rand_list }
446
+ rand_list = random .sample (range (0 , range_limit - 1 ), sampling_size )
447
+ random_indices = [ idx * 4 for idx in rand_list ]
448
448
logging .info ("Processing paired files in parallel" )
449
449
if num_threads >= 2 :
450
450
pool = multiprocessing .Pool (int (num_threads )) # pylint:disable=consider-using-with
@@ -498,6 +498,29 @@ def _subsample_fastq_subset(
498
498
compressed : the files is compressed
499
499
"""
500
500
line_index = 0
501
+ read_block = []
502
+ with gzip .open (fastq_file , "rt" ) if compressed else open (fastq_file ) as file_in , open (
503
+ output_file , "w+"
504
+ ) as file_out :
505
+ for line in file_in :
506
+ read_block .append (line )
507
+ if len (read_block ) == 4 :
508
+ if line_index in random_indices :
509
+ file_out .writelines (read_block )
510
+ read_block = []
511
+ line_index += 4
512
+ #lines = [file_in.readline() for _ in range(4)]
513
+ """
514
+ while lines[3]:
515
+ #lines = [file_in.readline() for _ in range(4)]
516
+ # Break if the end of the file is reached
517
+ if len(lines) < 4 : # No more lines to read
518
+ break
519
+ # Write to output if current index is in random_indices
520
+ if line_index in random_indices:
521
+ file_out.writelines(lines)
522
+ line_index += 4
523
+ lines = [file_in.readline() for _ in range(4)]
501
524
502
525
with gzip.open(fastq_file, "rt") if compressed else open(fastq_file) as file_in, open(
503
526
output_file, "w+"
@@ -508,7 +531,7 @@ def _subsample_fastq_subset(
508
531
file_out.writelines(lines)
509
532
line_index += 4
510
533
lines = [file_in.readline() for _ in range(4)]
511
-
534
+ """
512
535
513
536
def run_trimming (
514
537
output_dir : Path ,
@@ -609,9 +632,9 @@ def multiprocess_trim_galore(trim_galore_cmd: List, fastq_paired_files: List[Pat
609
632
def parse_args ():
610
633
"""Parse command line arguments."""
611
634
parser = argparse .ArgumentParser (description = "STAR's arguments" )
612
- parser .add_argument ("--genome_file" , required = True , help = "Genome file path" )
613
- parser .add_argument ("--output_dir" , required = True , help = "Output directory path" )
614
- parser .add_argument ("--short_read_fastq_dir" , required = True , help = "Short read directory path" )
635
+ parser .add_argument ("--genome_file" , help = "Genome file path" )
636
+ parser .add_argument ("--output_dir" , help = "Output directory path" )
637
+ parser .add_argument ("--short_read_fastq_dir" , help = "Short read directory path" )
615
638
parser .add_argument (
616
639
"--delete_pre_trim_fastq" ,
617
640
action = "store_true" ,
@@ -682,7 +705,6 @@ def parse_args():
682
705
parser .add_argument (
683
706
"--run_star" ,
684
707
type = bool ,
685
- default = True ,
686
708
help = "If True will run STAR alignment given an input dataset of fastq files." ,
687
709
required = False ,
688
710
)
0 commit comments