Skip to content

Commit

Permalink
Add plink param for ESCO4Cancer/GDI demo (galaxyproject#5624)
Browse files Browse the repository at this point in the history
* add new plink mode

* add new score files

* switch to tabular

* update test data

* add new test file

* change filenames

* Update tools/plink/plink.xml

Co-authored-by: Nicola Soranzo <[email protected]>

---------

Co-authored-by: Nicola Soranzo <[email protected]>
  • Loading branch information
bgruening and nsoranzo authored Nov 14, 2023
1 parent a01d758 commit 6d29c05
Show file tree
Hide file tree
Showing 3 changed files with 274 additions and 18 deletions.
81 changes: 63 additions & 18 deletions tools/plink/plink.xml
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
<tool id='plink' name='plink' version='@TOOL_VERSION@+galaxy@VERSION_SUFFIX@'>
<description>genome association analysis toolset</description>
<macros>
<token name='@TOOL_VERSION@'>1.90b6.21</token>
<token name='@VERSION_SUFFIX@'>0</token>
<token name='@VERSION_SUFFIX@'>1</token>
<xml name='template_sanitizer'>
<sanitizer>
<valid initial='default'>
Expand Down Expand Up @@ -50,6 +51,7 @@
</xrefs>
<requirements>
<requirement type='package' version='@TOOL_VERSION@'>plink</requirement>
<requirement type='package' version='5.3.0'>gawk</requirement>
</requirements>
<command detect_errors='exit_code'><![CDATA[
Expand All @@ -62,17 +64,17 @@
#if $functions.func == 'data_manage':
#if $functions.bmerge.set == 'Yes':
&& mkdir bmerge_files
&& ln -s '$functions.bmerge.bed.extra_files_path'/RgeneticsData.bed bmerge_files/bmerge_input.bed
&& ln -s '$functions.bmerge.bed.extra_files_path'/RgeneticsData.bim bmerge_files/bmerge_input.bim
&& ln -s '$functions.bmerge.bed.extra_files_path'/RgeneticsData.fam bmerge_files/bmerge_input.fam
&& ln -s '$functions.bmerge.bed.extra_files_path/RgeneticsData.bed' 'bmerge_files/bmerge_input.bed'
&& ln -s '$functions.bmerge.bed.extra_files_path/RgeneticsData.bim' 'bmerge_files/bmerge_input.bim'
&& ln -s '$functions.bmerge.bed.extra_files_path/RgeneticsData.fam' 'bmerge_files/bmerge_input.fam'
#end if
#end if
#if $inputs.inputs.filetype == 'bfile':
&& ln -s '$inputs.inputs.bed.extra_files_path'/RgeneticsData.bed plink_input/plink_input.bed
&& ln -s '$inputs.inputs.bed.extra_files_path'/RgeneticsData.bim plink_input/plink_input.bim
&& ln -s '$inputs.inputs.bed.extra_files_path'/RgeneticsData.fam plink_input/plink_input.fam
&& ln -s '$inputs.inputs.bed.extra_files_path/RgeneticsData.bed' 'plink_input/plink_input.bed'
&& ln -s '$inputs.inputs.bed.extra_files_path/RgeneticsData.bim' 'plink_input/plink_input.bim'
&& ln -s '$inputs.inputs.bed.extra_files_path/RgeneticsData.fam' 'plink_input/plink_input.fam'
&& plink --bfile plink_input/plink_input
#elif $inputs.inputs.filetype == 'vcf':
#if $inputs.inputs.input.is_of_type('bcf'):
Expand All @@ -84,7 +86,7 @@
## Plink commands by section
#if $inputs.covar_input:
--covar '$inputs.covar_input'
--covar '$inputs.covar_input'
#end if
#if $inputs.set_pheno.set_pheno == 'Yes':
--pheno $inputs.set_pheno.pheno $inputs.set_pheno.all_pheno
Expand Down Expand Up @@ -232,7 +234,6 @@
#end if
#elif $functions.func == 'data_manage':
#if $functions.bmerge.set == 'Yes':
--bmerge bmerge_files/bmerge_input
#end if
Expand Down Expand Up @@ -285,8 +286,6 @@
#end if
#end if
#end if
#elif $functions.func == 'link':
#if $functions.set_indep.choice == 'Yes':
--indep-pairwise $functions.set_indep.window $functions.set_indep.step $functions.set_indep.r2
Expand Down Expand Up @@ -382,8 +381,8 @@
#end if
#end if
## #elif $functions.func == 'scoring':
##
#elif $functions.func == 'scoring':
--score '$functions.score_file' $functions.variant_id_i $functions.allel_codes_j $functions.scores_k $functions.header $functions.sum
## #else:
## --rerun $functions.logfile
##
Expand All @@ -410,9 +409,12 @@
#end if
#end if
&& mkdir '$plink_out.extra_files_path'
&& cp plink_output/plink_output.bed '$plink_out.extra_files_path'/RgeneticsData.bed
&& cp plink_output/plink_output.bim '$plink_out.extra_files_path'/RgeneticsData.bim
&& cp plink_output/plink_output.fam '$plink_out.extra_files_path'/RgeneticsData.fam
&& cp plink_output/plink_output.bed '$plink_out.extra_files_path/RgeneticsData.bed'
&& cp plink_output/plink_output.bim '$plink_out.extra_files_path/RgeneticsData.bim'
&& cp plink_output/plink_output.fam '$plink_out.extra_files_path/RgeneticsData.fam'
#if $functions.func == 'scoring':
&& awk -v OFS="\t" '{$1=$1; print}' plink_output/plink_output.profile > plink_output/plink_output.profile.tab
#end if
]]></command>
<inputs>
<section name='inputs' title='Data inputs' expanded='true'>
Expand Down Expand Up @@ -450,6 +452,7 @@
<option value='stratification'>Population stratification</option>
<option value='association'>Association analysis</option>
<option value='ibd'>Identity-by-descent</option>
<option value='scoring'>Apply a linear scoring system (--score)</option>
<!-- <option value='rerun'>Rerun</option> -->
</param>
<when value='filtering'>
Expand Down Expand Up @@ -970,8 +973,20 @@
</conditional>
<param name='lambda' type='float' label='Lambda value' optional='true'/>
</when>
<!-- <when value='scoring'>
</when> -->
<when value='scoring'>
<!-- this implements a part of the -\-score parameter -->
<param name='score_file' format='tabular' type='data' label='Scoring system which should be applied to all samples'/>
<param name="variant_id_i" type="data_column" data_ref="score_file" label="Variant ID column"/>
<param name="allel_codes_j" type="data_column" data_ref="score_file" label="Allel column"/>
<param name="scores_k" type="data_column" data_ref="score_file" label="Score column"/>
<param name='header' type='boolean' truevalue='header' falsevalue='' checked='false' label='Is a header line present in the scores file?'/>

<param name="sum" type="select" label="How to combine the valid per-variant scores?" help="Average is the default, but it can be changed to sum.">
<option value="">Final scores are averages of the valid per-variant scores</option>
<option value="sum">Report sums (sum; can not be used with 'no-mean-imputation'; is automatically on with dosage data)</option>
<option value="no-sum">Disable sum (no-sum)</option>
</param>
</when>
<when value='ibd'>
<conditional name='genome'>
<param name='output_genome' type='select' help='Perform and return results of IBS/IBD computation'>
Expand Down Expand Up @@ -1060,6 +1075,11 @@
<filter>functions['func'] == 'ibd' and functions['genome']['output_genome']</filter>
</data>

<!--scores-->
<data name='scores' format='tabular' from_work_dir='plink_output/plink_output.profile.tab' label='${tool.name}: Scores'>
<filter>functions['func'] == 'scoring'</filter>
</data>

<!--Stratifiction-->
<data name='mds' format='txt' from_work_dir='plink_output/plink_output.mds' label='${tool.name}: MDS'>
<filter>functions['func'] == 'stratification' and functions['cluster']['cluster'] == 'Yes' and functions['cluster']['mds']['mds_scaling'] == 'Yes'</filter>
Expand Down Expand Up @@ -1203,6 +1223,7 @@
<expand macro="log_out_assert"/>
</test>


<test expect_num_outputs='8'>
<section name='inputs'>
<conditional name='inputs'>
Expand Down Expand Up @@ -1393,6 +1414,30 @@
<expand macro="log_out_assert"/>
<output name='genome' file='out.genome'/>
</test>
<!-- scoring function test-->
<test expect_num_outputs='3'>
<section name='inputs'>
<conditional name='inputs'>
<param name='filetype' value='bfile'/>
<param name='bed' value='' ftype="pbed">
<composite_data value="plink.bim"/>
<composite_data value="plink.bed"/>
<composite_data value="plink.fam"/>
</param>
</conditional>
</section>
<conditional name='functions'>
<param name='func' value='scoring'/>
<param name='score_file' value='scores_file.tab'/>
<param name='variant_id_i' value='1'/>
<param name='allel_codes_j' value='2'/>
<param name='scores_k' value='3'/>
<param name='header' value='true'/>
<param name='sum' value='sum'/>
</conditional>
<expand macro="log_out_assert"/>
<output name='scores' file='scores_results.tab'/>
</test>
</tests>
<help><![CDATA[
PLINK is a free, open-source whole genome association analysis toolset, designed to perform a range of basic, large-scale analyses in a computationally efficient manner.
Expand Down
10 changes: 10 additions & 0 deletions tools/plink/test-data/scores_file.tab
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
variant effect_allele beta
snp1 T -0.078507
snp2 A 0.16488
snp3 C -0.043171
snp18 C -0.37606
snp19 T -0.039295
snp20 A 0.033082
snp21 T 0.13057
snp22 C 0.02329
snp23 G 0.027248
201 changes: 201 additions & 0 deletions tools/plink/test-data/scores_results.tab
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
FID IID PHENO CNT CNT2 SCORESUM
per0 per0 1 4 0 0
per1 per1 2 4 2 0.066164
per2 per2 1 4 3 0.231044
per3 per3 1 4 2 0.197962
per4 per4 1 4 2 0.197962
per5 per5 1 4 1 0.16488
per6 per6 2 4 2 0.32976
per7 per7 2 4 2 0.197962
per8 per8 1 4 2 0.197962
per9 per9 1 4 2 0.197962
per10 per10 1 4 2 0.197962
per11 per11 2 4 1 0.16488
per12 per12 1 4 0 0
per13 per13 2 4 3 0.362842
per14 per14 2 4 3 0.231044
per15 per15 1 4 1 0.16488
per16 per16 2 4 2 0.197962
per17 per17 2 4 1 0.16488
per18 per18 1 4 2 0.197962
per19 per19 2 4 3 0.231044
per20 per20 2 4 3 0.362842
per21 per21 2 4 2 0.197962
per22 per22 2 4 1 0.033082
per23 per23 2 4 0 0
per24 per24 1 4 2 0.32976
per25 per25 1 4 4 0.395924
per26 per26 2 4 1 0.033082
per27 per27 2 4 1 0.16488
per28 per28 2 4 1 0.033082
per29 per29 2 4 3 0.231044
per30 per30 2 4 2 0.197962
per31 per31 2 4 2 0.197962
per32 per32 2 4 0 0
per33 per33 1 4 2 0.32976
per34 per34 1 4 2 0.197962
per35 per35 1 4 4 0.395924
per36 per36 2 4 1 0.033082
per37 per37 2 4 3 0.231044
per38 per38 2 4 1 0.033082
per39 per39 1 4 2 0.197962
per40 per40 1 4 2 0.197962
per41 per41 1 4 2 0.197962
per42 per42 2 4 2 0.197962
per43 per43 2 4 2 0.32976
per44 per44 2 4 4 0.395924
per45 per45 2 4 4 0.395924
per46 per46 2 4 2 0.066164
per47 per47 2 4 2 0.197962
per48 per48 1 4 3 0.231044
per49 per49 1 4 4 0.395924
per50 per50 2 4 3 0.231044
per51 per51 1 4 2 0.197962
per52 per52 2 4 1 0.033082
per53 per53 1 4 3 0.362842
per54 per54 2 4 1 0.033082
per55 per55 1 4 2 0.32976
per56 per56 2 4 1 0.033082
per57 per57 2 4 2 0.32976
per58 per58 2 4 2 0.066164
per59 per59 1 4 2 0.197962
per60 per60 2 4 2 0.32976
per61 per61 2 4 1 0.16488
per62 per62 2 4 1 0.033082
per63 per63 1 4 2 0.197962
per64 per64 1 4 3 0.231044
per65 per65 2 4 3 0.362842
per66 per66 2 4 4 0.395924
per67 per67 1 4 2 0.066164
per68 per68 2 4 3 0.231044
per69 per69 1 4 1 0.16488
per70 per70 2 4 2 0.197962
per71 per71 2 4 2 0.197962
per72 per72 2 4 2 0.32976
per73 per73 1 4 1 0.033082
per74 per74 2 4 1 0.16488
per75 per75 1 4 2 0.066164
per76 per76 2 4 1 0.16488
per77 per77 2 4 3 0.231044
per78 per78 1 4 2 0.066164
per79 per79 2 4 3 0.362842
per80 per80 1 4 3 0.362842
per81 per81 2 4 3 0.231044
per82 per82 1 4 2 0.197962
per83 per83 1 4 3 0.362842
per84 per84 1 4 2 0.197962
per85 per85 2 4 0 0
per86 per86 1 4 3 0.362842
per87 per87 2 4 1 0.033082
per88 per88 1 4 2 0.32976
per89 per89 2 4 2 0.32976
per90 per90 2 4 1 0.16488
per91 per91 1 4 2 0.197962
per92 per92 2 4 3 0.231044
per93 per93 1 4 3 0.231044
per94 per94 2 4 1 0.16488
per95 per95 2 4 3 0.362842
per96 per96 2 4 3 0.362842
per97 per97 1 4 2 0.197962
per98 per98 2 4 1 0.16488
per99 per99 2 4 0 0
per100 per100 2 4 1 0.033082
per101 per101 2 4 1 0.16488
per102 per102 1 4 0 0
per103 per103 1 4 2 0.197962
per104 per104 1 4 1 0.16488
per105 per105 2 4 1 0.033082
per106 per106 2 4 2 0.197962
per107 per107 2 4 1 0.16488
per108 per108 2 4 2 0.32976
per109 per109 1 4 3 0.362842
per110 per110 1 4 2 0.066164
per111 per111 1 4 2 0.197962
per112 per112 2 4 4 0.395924
per113 per113 1 4 0 0
per114 per114 2 4 3 0.231044
per115 per115 1 4 2 0.197962
per116 per116 1 4 1 0.16488
per117 per117 2 4 2 0.197962
per118 per118 2 4 2 0.066164
per119 per119 2 4 0 0
per120 per120 2 4 1 0.16488
per121 per121 1 4 3 0.231044
per122 per122 2 4 0 0
per123 per123 1 4 4 0.395924
per124 per124 1 4 2 0.197962
per125 per125 1 4 3 0.362842
per126 per126 2 4 2 0.066164
per127 per127 1 4 2 0.066164
per128 per128 2 4 1 0.16488
per129 per129 2 4 2 0.197962
per130 per130 2 4 1 0.16488
per131 per131 2 4 1 0.033082
per132 per132 2 4 3 0.362842
per133 per133 2 4 2 0.066164
per134 per134 1 4 2 0.197962
per135 per135 1 4 1 0.033082
per136 per136 1 4 0 0
per137 per137 1 4 3 0.362842
per138 per138 1 4 2 0.32976
per139 per139 1 4 1 0.033082
per140 per140 1 4 0 0
per141 per141 1 4 3 0.362842
per142 per142 1 4 1 0.16488
per143 per143 1 4 2 0.197962
per144 per144 2 4 2 0.066164
per145 per145 2 4 2 0.197962
per146 per146 1 4 3 0.362842
per147 per147 2 4 2 0.197962
per148 per148 2 4 2 0.197962
per149 per149 2 4 3 0.362842
per150 per150 1 4 2 0.066164
per151 per151 2 4 1 0.033082
per152 per152 1 4 1 0.16488
per153 per153 2 4 3 0.231044
per154 per154 2 4 1 0.16488
per155 per155 1 4 3 0.231044
per156 per156 1 4 2 0.32976
per157 per157 1 4 3 0.362842
per158 per158 2 4 1 0.16488
per159 per159 1 4 2 0.066164
per160 per160 2 4 3 0.231044
per161 per161 1 4 2 0.197962
per162 per162 2 4 2 0.197962
per163 per163 1 4 2 0.197962
per164 per164 2 4 3 0.231044
per165 per165 2 4 4 0.395924
per166 per166 1 4 2 0.197962
per167 per167 1 4 2 0.197962
per168 per168 1 4 2 0.197962
per169 per169 1 4 1 0.033082
per170 per170 2 4 1 0.16488
per171 per171 1 4 1 0.033082
per172 per172 1 4 3 0.231044
per173 per173 1 4 3 0.362842
per174 per174 1 4 1 0.033082
per175 per175 2 4 2 0.197962
per176 per176 2 4 0 0
per177 per177 2 4 2 0.197962
per178 per178 1 4 1 0.033082
per179 per179 1 4 2 0.197962
per180 per180 1 4 2 0.066164
per181 per181 1 4 1 0.033082
per182 per182 1 4 3 0.362842
per183 per183 1 4 4 0.395924
per184 per184 1 4 3 0.362842
per185 per185 1 4 2 0.197962
per186 per186 2 4 3 0.231044
per187 per187 1 4 1 0.16488
per188 per188 1 4 3 0.231044
per189 per189 2 4 2 0.197962
per190 per190 1 4 1 0.033082
per191 per191 1 4 4 0.395924
per192 per192 1 4 2 0.197962
per193 per193 2 4 1 0.033082
per194 per194 2 4 2 0.197962
per195 per195 1 4 2 0.32976
per196 per196 1 4 2 0.197962
per197 per197 1 4 3 0.231044
per198 per198 1 4 3 0.231044
per199 per199 2 4 4 0.395924

0 comments on commit 6d29c05

Please sign in to comment.