-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathhpv_config.yaml
106 lines (76 loc) · 3.06 KB
/
hpv_config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# Configuration for Universal and Single Panel primer Snakemake pipeline.
# 1) This is where the original bam files can be found. (glob)
raw_path: your/path/*bam
# 2) This is where the **REMAPPED** bam files can be found. (glob)
tmap_path: tmap/path/*bam
# types to include in analysis. creates dictionary {'hpvtypes': ['16', '18', etc]}
hpvtypes:
- '16'
# are the reference genomes padded?
padding: True
# amplicon panel type (single or universal)
panel: single
# genes in gtf
genes:
- E1
- E2
- E4
- E5
- E6
- E7
- E8
- L1
- L2
no_e5:
- 51
- 56
# 5) Set the cohort prefix if any (blank if not)
cohort:
# 6) This is name of the deliverable directory.
#deliver_proj: Type_Study_RD###_YearMonthDay
deliver_proj: project_name
deliver_dir: /path/project
# 7) Update the parser in the Snakemake file to match the raw bam filenames
# This hasn't changed in a long time, so this step can usually be ignored.
# 8) Update the wrapper.sh script
# 9) Select mapq threshold.
aq_filter: 4
# coverage bins
mosdepth_thresh: '4,20,100,1000'
mosdepth_quant: 0:20:50:100:200
# 10) Fix titles in coverage_Snakefile for appropriate Type.
# 11) choose minimum read depth for fasta files
min_read: 4
# 12) choose cutoff for % Ns in the final fasta
fasta_n: 75
# 13) choose max # reads for colorbar for coverage clutermap
cbar_max: 20
# Make sure X forwarding is enabled for ete3 toolkit!!
# 14) phylo tree params
ete_wf: none-none-none-fasttree_default
####### Reference Files - Nothing below should be changed between runs!
# These directories also contain the necessary index and dict files for TMAP
hpv_reference: /references/HPV_Ref_Alphas65_Pad400_hg19.fasta
# this file has line breaks and will not break TVC, however, you can't use it for the type_fasta rule
hpv_ref_nobreak: /references/HPV_Ref_Alphas65_Pad400_hg19.fasta.NOBREAKS
# this file will break TVC but can be used for the type_fasta rule
# beds are linked here with the format: HPV16.amplicon.bed
#amplicon_bed: /references/HPV/amplicon_bed/HPV%s.amplicon.bed
amplicon_bed: /references/WG00038.1_HPV%s_FFPE.Designed.bed
universal_bed: /references/HPVRef_13Types_Ref_Mapped_hg19_update_HPV16.unmerged-detail.bed
# these bed files encompass the full genome from 0..8000 (or whatever)
len_bed: /references/hpv%s.bed
lineage: /references/HPV%s_lineages.fasta
# Tools and packages
vc_bin: /references/HPV/tvc-5.0.3/bin
vc_pipe: /references/HPV/tvc-5.0.3/bin/variant_caller_pipeline.py
vc_param: /references/HPV/germline_low_stringency_no_het_low_depth.json
cov_dev: /references/HPV/cgrCoverageDev
glu: /references/HPV/glu
# SNPEFF
# any new HPV types need to be added to the hpv_universal database and the annotations txt below
snpeff: /references/snpEff
snpeff_db: hpv_universal
snpeff_bed: /references/HPV/universal_annotations_20170106.txt # has extra regions like splice, URR, etc.
genes_gtf: /references/snpEff/data/hpv_universal/genes.gtf # this does not have the CDS, start, stop, etc regions
genes_bed: /references/HPV/universal_genes.bed # above gtf converted to bed format - also no CDS, etc