Skip to content

Commit

Permalink
Merge pull request #49 from sanger-tol/reworked_github_test
Browse files Browse the repository at this point in the history
Reworked GitHub test
  • Loading branch information
ksenia-krasheninnikova authored Aug 14, 2024
2 parents 5869d2a + 3e9f5b0 commit db683a4
Show file tree
Hide file tree
Showing 18 changed files with 35 additions and 312 deletions.
6 changes: 1 addition & 5 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,6 @@ jobs:
run: |
nextflow secrets set NCBI_API_KEY ${{ secrets.NCBI_API_KEY }}
- name: Download test data
run: |
curl https://tolit.cog.sanger.ac.uk/test-data/resources/genomeassembly/genomeassembly_test_data.tar.gz | tar xzf -
- name: Setup apptainer
uses: eWaterCycle/setup-apptainer@main

Expand All @@ -62,4 +58,4 @@ jobs:
- name: Run pipeline with test data
run: |
nextflow run ${GITHUB_WORKSPACE} -profile test_github,singularity --outdir ./results
nextflow run ${GITHUB_WORKSPACE} -profile test,singularity --outdir ./results
13 changes: 8 additions & 5 deletions assets/test.yaml
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
dataset:
id: baUndUnlc1
illumina_10X:
reads: /lustre/scratch123/tol/resources/nextflow/test-data/Undibacterium_unclassified/genomic_data/baUndUnlc1/10x/
reads:
- https://tolit.cog.sanger.ac.uk/test-data/Undibacterium_unclassified/genomic_data/baUndUnlc1/10x/baUndUnlc1_S12_L002_R1_001.fastq.gz
- https://tolit.cog.sanger.ac.uk/test-data/Undibacterium_unclassified/genomic_data/baUndUnlc1/10x/baUndUnlc1_S12_L002_R2_001.fastq.gz
- https://tolit.cog.sanger.ac.uk/test-data/Undibacterium_unclassified/genomic_data/baUndUnlc1/10x/baUndUnlc1_S12_L002_I1_001.fastq.gz
pacbio:
reads:
- reads: /lustre/scratch124/tol/projects/darwin/users/kk16/development/test/test/HiFi.reads.BIG.fasta
- reads: https://tolit.cog.sanger.ac.uk/test-data/Undibacterium_unclassified/genomic_data/baUndUnlc1/pacbio/fasta/HiFi.reads.fasta
HiC:
reads:
- reads: /lustre/scratch123/tol/resources/nextflow/test-data/Undibacterium_unclassified/genomic_data/baUndUnlc1/hic-arima2/41741_2#7.sub.cram
- reads: https://tolit.cog.sanger.ac.uk/test-data/Undibacterium_unclassified/genomic_data/baUndUnlc1/hic-arima2/41741_2%237.sub.cram
hic_motif: GATC,GANTC,CTNAG,TTAA
hic_aligner: bwamem2
busco:
Expand All @@ -16,6 +19,6 @@ mito:
species: Caradrina clavipalpis
min_length: 15000
code: 5
fam: /lustre/scratch124/tol/projects/darwin/users/cz3/organelle_asm/hmm_db/insecta_mito.fam
fam: https://github.com/c-zhou/OatkDB/raw/main/v20230921/insecta_mito.fam
plastid:
fam: /lustre/scratch124/tol/projects/darwin/users/cz3/organelle_asm/hmm_db/acrogymnospermae_pltd.fam
fam: https://github.com/c-zhou/OatkDB/raw/main/v20230921/acrogymnospermae_pltd.fam
19 changes: 0 additions & 19 deletions assets/test_github.yaml

This file was deleted.

7 changes: 4 additions & 3 deletions bin/generate_cram_csv.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@ for cram in "$@"; do
rgline=$(samtools view -H $cram|grep "RG"|sed 's/\t/\\t/g'|sed "s/'//g")

crampath=$(readlink -f ${cram})
craipath=$(readlink -f ${cram}.crai)

ncontainers=$(zcat ${crampath}.crai|wc -l)
ncontainers=$(zcat ${craipath} | wc -l)
base=$(basename $cram .cram)

from=0
Expand All @@ -22,15 +23,15 @@ for cram in "$@"; do

while [ $to -lt $ncontainers ]
do
echo $crampath,${crampath}.crai,${from},${to},${base},${chunkn},${rgline}
echo $crampath,${craipath},${from},${to},${base},${chunkn},${rgline}
from=$((to+1))
((to+=10000))
((chunkn++))
done

if [ $from -le $ncontainers ]
then
echo $crampath,${crampath}.crai,${from},${ncontainers},${base},${chunkn},${rgline}
echo $crampath,${craipath},${from},${ncontainers},${base},${chunkn},${rgline}
((chunkn++))
fi
done
Expand Down
35 changes: 6 additions & 29 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,9 @@ process {
// Set up of the polishing pipeline
if (params.polishing_on) {
withName: LONGRANGER_MKREF {
if(System.getenv('GITHUB_ACTION') != null ) {
container = "ghcr.io/sanger-tol/longranger:2.2.2-c4"
}
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasm}/polishing" },
mode: params.publish_dir_mode,
Expand All @@ -368,6 +371,9 @@ process {
withName: LONGRANGER_ALIGN {
// Keep in sync with `longranger_lsf_sanger.config`
ext.args = "--disable-ui --nopreflight"
if(System.getenv('GITHUB_ACTION') != null ) {
container = "ghcr.io/sanger-tol/longranger:2.2.2-c4"
}
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasm}/polishing" },
mode: params.publish_dir_mode,
Expand Down Expand Up @@ -674,33 +680,4 @@ profiles {
}
}
}

test_github {
process {
// Set up of the scaffolding pipeline
withName: 'YAHS' {
// Skip the initial assembly error correction step
ext.args = '-r 1000,2000,5000'
}

withName: '.*HIFIASM.*' {
// Skip bloom filter
ext.args = '--primary -f0'
}

withName: '.*OATK' {
// Set kmer size and minimal coverage
ext.args = "-k1001 -c5 -Ttmp"
}

if (params.polishing_on) {
withName: LONGRANGER_MKREF {
container = "ghcr.io/sanger-tol/longranger:2.2.2-c4"
}
withName: LONGRANGER_ALIGN {
container = "ghcr.io/sanger-tol/longranger:2.2.2-c4"
}
}
}
}
}
7 changes: 3 additions & 4 deletions conf/test.config
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,14 @@ params {
config_profile_name = 'Test profile'
config_profile_description = 'Minimal test dataset to check pipeline function'

// Limit resources so that this can run on GitHub Actions
max_cpus = 2
max_memory = '6.GB'
// Match resource limits with the ubuntu2204-4c runner
max_cpus = 4
max_memory = '15.GB'
max_time = '6.h'

// Input data
input = "${projectDir}/assets/test.yaml"
bed_chunks_polishing = 2
organelles_on = true
polishing_on = true
hifiasm_hic_on = true
}
28 changes: 0 additions & 28 deletions conf/test_github.config

This file was deleted.

9 changes: 2 additions & 7 deletions docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,17 +86,12 @@ mito:
The pipeline can be tested locally using a provided small test dataset:
```
cd ${GENOMEASSEMBLY_TEST_DATA}
curl https://darwin.cog.sanger.ac.uk/genomeassembly_test_data.tar.gz | tar xzf -

git clone [email protected]:sanger-tol/genomeassembly.git
cd genomeassembly/
sed -i "s|/home/runner/work/genomeassembly/genomeassembly|${GENOMEASSEMBLY_TEST_DATA}|" assets/test_github.yaml
nextflow run main.nf -profile test_github,singularity --outdir ${OUTDIR} {OTHER ARGUMENTS}
nextflow run main.nf -profile test,singularity --outdir ${OUTDIR} {OTHER ARGUMENTS}
```

These command line steps will download and decompress the test data first, then download the pipeline and modify YAML so that it matches dataset location in your file system.
The last command line runs the test.
These command line steps will download the pipeline and run the test.

You should now be able to run the pipeline as you see fit.

Expand Down
4 changes: 2 additions & 2 deletions modules/local/generate_cram_csv.nf
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ process GENERATE_CRAM_CSV {
'biocontainers/samtools:1.17--h00cdaf9_0' }"

input:
tuple val(meta), path(crampaths, stageAs: "?/*")
tuple val(meta), path(crampaths, stageAs: "?/*"), path(craipaths, stageAs: "?/*")


output:
Expand All @@ -23,7 +23,7 @@ process GENERATE_CRAM_CSV {
script:
def prefix = task.ext.prefix ?: "${meta.id}"
"""
generate_cram_csv.sh $crampaths >> ${prefix}_cram.csv
generate_cram_csv.sh $crampaths > ${prefix}_cram.csv
cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
4 changes: 2 additions & 2 deletions modules/local/longranger/align/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ process LONGRANGER_ALIGN {

input:
tuple val(meta), path(reference)
path(fastqs)
path(fastqs, stageAs: "10X_inputs/*")

output:
tuple val(meta), path("${meta.id}/outs/possorted_bam.bam"), emit: bam
Expand All @@ -26,7 +26,7 @@ process LONGRANGER_ALIGN {
def args = task.ext.args ?: ''
def sample = "${meta.id}"
"""
longranger align --id=$sample --fastqs=$fastqs \
longranger align --id=$sample --fastqs=10X_inputs \
--sample=$sample --reference=$reference \
${args}
Expand Down

This file was deleted.

107 changes: 0 additions & 107 deletions modules/nf-core/busco/iyVesGerm1-insecta_odb10-busco/logs/busco.log

This file was deleted.

Loading

0 comments on commit db683a4

Please sign in to comment.