Skip to content

Commit

Permalink
Merge pull request #122 from jolespin/devel
Browse files Browse the repository at this point in the history
Fixed readme
  • Loading branch information
jolespin authored Sep 22, 2024
2 parents 2a504ae + a46659c commit 2848a37
Show file tree
Hide file tree
Showing 7 changed files with 32 additions and 22 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -472,6 +472,7 @@ ________________________________________________________________
<details>
<summary> <b>Daily Change Log:</b> </summary>

* [2024.8.30] - Added ${N_JOBS} to download scripts with default set to maximum threads available
* [2024.8.29] - Added `VERSION` file created in `download_databases.sh`
* [2024.7.11] - Alignment fraction threshold for genome clustering only applied to reference but should also apply to query. Added `--af_mode` with either `relaxed = max([Alignment_fraction_ref, Alignment_fraction_query]) > minimum_af` or `strict = (Alignment_fraction_ref > minimum_af) & (Alignment_fraction_query > minimum_af)` to `edgelist_to_clusters.py`, `global_clustering.py`, `local_clustering.py`, and `cluster.py`.
* [2024.7.3] - Added `pigz` to `VEBA-annotate_env` which isn't a problem with most `conda` installations but needed for `docker` containers.
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ ___________________________________________________________________

### Announcements

* **Current Stable Version:** [`v2.2.1`](https://github.com/jolespin/veba/releases/tag/v2.2.0)
* **Current Stable Version:** [`v2.2.1`](https://github.com/jolespin/veba/releases/tag/v2.2.1)

* **Current Database Version:** [`VDB_v7`](install/DATABASE.md)

Expand Down
15 changes: 8 additions & 7 deletions install/download_databases-annotate.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
# __version__ = "2024.6.8"
# __version__ = "2024.8.30"
# VEBA_DATABASE_VERSION = "VDB_v7"
# MICROEUKAYROTIC_DATABASE_VERSION = "MicroEuk_v3"
# usage: bash veba/download_databases-annotate.sh /path/to/veba_database_destination/
Expand All @@ -9,7 +9,8 @@ DATABASE_DIRECTORY=${1:-"."}
REALPATH_DATABASE_DIRECTORY=$(realpath $DATABASE_DIRECTORY)
SCRIPT_DIRECTORY=$(dirname "$0")

# N_JOBS=$(2:-"1")
MAXIMUM_NUMBER_OF_CPU=$(python -c "from multiprocessing import cpu_count; print(cpu_count())")
N_JOBS=${3:-${MAXIMUM_NUMBER_OF_CPU}}

# Database structure
echo ". .. ... ..... ........ ............."
Expand Down Expand Up @@ -78,19 +79,19 @@ mkdir -v -p ${DATABASE_DIRECTORY}/Annotate/UniRef

wget -v -P ${DATABASE_DIRECTORY}/Annotate/UniRef/ https://ftp.uniprot.org/pub/databases/uniprot/current_release/uniref/uniref90/uniref90.release_note
wget -v -P ${DATABASE_DIRECTORY} https://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref90/uniref90.fasta.gz
diamond makedb --in ${DATABASE_DIRECTORY}/uniref90.fasta.gz --db ${DATABASE_DIRECTORY}/Annotate/UniRef/uniref90.dmnd
diamond makedb --in ${DATABASE_DIRECTORY}/uniref90.fasta.gz --db ${DATABASE_DIRECTORY}/Annotate/UniRef/uniref90.dmnd --threads ${N_JOBS}
rm -rf ${DATABASE_DIRECTORY}/uniref90.fasta.gz

wget -v -P ${DATABASE_DIRECTORY}/Annotate/UniRef/ https://ftp.uniprot.org/pub/databases/uniprot/current_release/uniref/uniref50/uniref50.release_note
wget -v -P ${DATABASE_DIRECTORY} https://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref50/uniref50.fasta.gz
diamond makedb --in ${DATABASE_DIRECTORY}/uniref50.fasta.gz --db ${DATABASE_DIRECTORY}/Annotate/UniRef/uniref50.dmnd
diamond makedb --in ${DATABASE_DIRECTORY}/uniref50.fasta.gz --db ${DATABASE_DIRECTORY}/Annotate/UniRef/uniref50.dmnd --threads ${N_JOBS}
rm -rf ${DATABASE_DIRECTORY}/uniref50.fasta.gz

#MiBIG
mkdir -v -p ${DATABASE_DIRECTORY}/Annotate/MIBiG
wget -v -P ${DATABASE_DIRECTORY} https://dl.secondarymetabolites.org/mibig/mibig_prot_seqs_3.1.fasta
seqkit rmdup -s ${DATABASE_DIRECTORY}/mibig_prot_seqs_3.1.fasta > ${DATABASE_DIRECTORY}/mibig_prot_seqs_3.1.rmdup.fasta
diamond makedb --in ${DATABASE_DIRECTORY}/mibig_prot_seqs_3.1.rmdup.fasta --db ${DATABASE_DIRECTORY}/Annotate/MIBiG/mibig_v3.1.dmnd
diamond makedb --in ${DATABASE_DIRECTORY}/mibig_prot_seqs_3.1.rmdup.fasta --db ${DATABASE_DIRECTORY}/Annotate/MIBiG/mibig_v3.1.dmnd --threads ${N_JOBS}
rm -rf ${DATABASE_DIRECTORY}/mibig_prot_seqs_3.1.fasta
rm -rf ${DATABASE_DIRECTORY}/mibig_prot_seqs_3.1.rmdup.fasta

Expand All @@ -104,13 +105,13 @@ rm -rf ${DATABASE_DIRECTORY}/mibig_prot_seqs_3.1.rmdup.fasta
mkdir -v -p ${DATABASE_DIRECTORY}/Annotate/VFDB
wget -v -P ${DATABASE_DIRECTORY} http://www.mgc.ac.cn/VFs/Down/VFDB_setA_pro.fas.gz
wget -v -P ${DATABASE_DIRECTORY}/Annotate/VFDB/ http://www.mgc.ac.cn/VFs/Down/VFs.xls.gz
diamond makedb --in ${DATABASE_DIRECTORY}/VFDB_setA_pro.fas.gz --db ${DATABASE_DIRECTORY}/Annotate/VFDB/VFDB_setA_pro.dmnd
diamond makedb --in ${DATABASE_DIRECTORY}/VFDB_setA_pro.fas.gz --db ${DATABASE_DIRECTORY}/Annotate/VFDB/VFDB_setA_pro.dmnd --threads ${N_JOBS}
rm -rf ${DATABASE_DIRECTORY}/VFDB_setA_pro.fas.gz

# CAZy
mkdir -v -p ${DATABASE_DIRECTORY}/Annotate/CAZy
wget -v -P ${DATABASE_DIRECTORY} https://bcb.unl.edu/dbCAN2/download/CAZyDB.07262023.fa
diamond makedb --in ${DATABASE_DIRECTORY}/CAZyDB.07262023.fa --db ${DATABASE_DIRECTORY}/Annotate/CAZy/CAZyDB.07262023.dmnd
diamond makedb --in ${DATABASE_DIRECTORY}/CAZyDB.07262023.fa --db ${DATABASE_DIRECTORY}/Annotate/CAZy/CAZyDB.07262023.dmnd --threads ${N_JOBS}
rm -rf ${DATABASE_DIRECTORY}/CAZyDB.07262023.fa


Expand Down
7 changes: 4 additions & 3 deletions install/download_databases-classify.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
# __version__ = "2024.6.8.1"
# __version__ = "2024.8.30"
# VEBA_DATABASE_VERSION = "VDB_v7"
# MICROEUKAYROTIC_DATABASE_VERSION = "MicroEuk_v3"
# usage: bash veba/download_databases-classify.sh /path/to/veba_database_destination/
Expand All @@ -9,7 +9,8 @@ DATABASE_DIRECTORY=${1:-"."}
REALPATH_DATABASE_DIRECTORY=$(realpath $DATABASE_DIRECTORY)
SCRIPT_DIRECTORY=$(dirname "$0")

# N_JOBS=$(2:-"1")
MAXIMUM_NUMBER_OF_CPU=$(python -c "from multiprocessing import cpu_count; print(cpu_count())")
N_JOBS=${3:-${MAXIMUM_NUMBER_OF_CPU}}

# Database structure
echo ". .. ... ..... ........ ............."
Expand Down Expand Up @@ -81,7 +82,7 @@ wget -v -P ${DATABASE_DIRECTORY} https://portal.nersc.gov/CheckV/checkv-db-${CHE
tar xvzf ${DATABASE_DIRECTORY}/checkv-db-${CHECKVDB_VERSION}.tar.gz -C ${DATABASE_DIRECTORY}
mv ${DATABASE_DIRECTORY}/checkv-db-${CHECKVDB_VERSION} ${DATABASE_DIRECTORY}/Classify/CheckV
echo "${CHECKV_VERSION}" > ${DATABASE_DIRECTORY}/Classify/CheckV/database_version
diamond makedb --in ${DATABASE_DIRECTORY}/Classify/CheckV/genome_db/checkv_reps.faa --db ${DATABASE_DIRECTORY}/Classify/CheckV/genome_db/checkv_reps.dmnd
diamond makedb --in ${DATABASE_DIRECTORY}/Classify/CheckV/genome_db/checkv_reps.faa --db ${DATABASE_DIRECTORY}/Classify/CheckV/genome_db/checkv_reps.dmnd --threads ${N_JOBS}
rm -rf ${DATABASE_DIRECTORY}/checkv-db-${CHECKVDB_VERSION}.tar.gz

# geNomad
Expand Down
5 changes: 3 additions & 2 deletions install/download_databases-contamination.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
# __version__ = "2024.6.8"
# __version__ = "2024.8.30"
# VEBA_DATABASE_VERSION = "VDB_v7"
# MICROEUKAYROTIC_DATABASE_VERSION = "MicroEuk_v3"
# usage: bash veba/download_databases-contamination.sh /path/to/veba_database_destination/
Expand All @@ -9,7 +9,8 @@ DATABASE_DIRECTORY=${1:-"."}
REALPATH_DATABASE_DIRECTORY=$(realpath $DATABASE_DIRECTORY)
SCRIPT_DIRECTORY=$(dirname "$0")

# N_JOBS=$(2:-"1")
MAXIMUM_NUMBER_OF_CPU=$(python -c "from multiprocessing import cpu_count; print(cpu_count())")
N_JOBS=${3:-${MAXIMUM_NUMBER_OF_CPU}}

# Database structure
echo ". .. ... ..... ........ ............."
Expand Down
5 changes: 3 additions & 2 deletions install/download_databases-markers.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
# __version__ = "2024.6.8"
# __version__ = "2024.8.30"
# VEBA_DATABASE_VERSION = "VDB_v8"
# MICROEUKAYROTIC_DATABASE_VERSION = "MicroEuk_v3"
# usage: bash veba/download_databases-preprocess.sh /path/to/veba_database_destination/
Expand All @@ -9,7 +9,8 @@ DATABASE_DIRECTORY=${1:-"."}
REALPATH_DATABASE_DIRECTORY=$(realpath $DATABASE_DIRECTORY)
SCRIPT_DIRECTORY=$(dirname "$0")

# N_JOBS=$(2:-"1")
MAXIMUM_NUMBER_OF_CPU=$(python -c "from multiprocessing import cpu_count; print(cpu_count())")
N_JOBS=${3:-${MAXIMUM_NUMBER_OF_CPU}}

# Database structure
echo ". .. ... ..... ........ ............."
Expand Down
19 changes: 12 additions & 7 deletions install/download_databases.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/bin/bash
# __version__ = "2024.8.29"
# __version__ = "2024.8.30"
# MICROEUKAYROTIC_DATABASE_VERSION = "MicroEuk_v3"
# usage: bash veba/download_databases.sh /path/to/veba_database_destination/ [optional positional argument: /path/to/conda_environments/]
# usage: bash veba/download_databases.sh /path/to/veba_database_destination/ [optional positional argument: /path/to/conda_environments/ number_of_threads]
# Version
VEBA_DATABASE_VERSION="VDB_v7"

Expand All @@ -12,7 +12,12 @@ SCRIPT_DIRECTORY=$(dirname "$0")

CONDA_ENVS_PATH=${2:-"$(conda info --base)/envs/"}

# N_JOBS=$(2:-"1")
MAXIMUM_NUMBER_OF_CPU=$(python -c "from multiprocessing import cpu_count; print(cpu_count())")
N_JOBS=${3:-${MAXIMUM_NUMBER_OF_CPU}}
echo ". .. ... ..... ........ ............."
echo "Detected ${MAXIMUM_NUMBER_OF_CPU} available threads"
echo "Using ${N_JOBS} threads"
echo ". .. ... ..... ........ ............."

# Database structure
echo ". .. ... ..... ........ ............."
Expand All @@ -33,24 +38,24 @@ echo $VEBA_DATABASE_VERSION > ${DATABASE_DIRECTORY}/VERSION
echo ". .. ... ..... ........ ............."
echo "Downloading and configuring database (markers)"
echo ". .. ... ..... ........ ............."
bash ${SCRIPT_DIRECTORY}/download_databases-markers.sh ${DATABASE_DIRECTORY} | grep -v "\[partial-database\]"
bash ${SCRIPT_DIRECTORY}/download_databases-markers.sh ${DATABASE_DIRECTORY} ${N_JOBS} | grep -v "\[partial-database\]"

echo ". .. ... ..... ........ ............."
echo "Downloading and configuring database (contamination)"
echo ". .. ... ..... ........ ............."
bash ${SCRIPT_DIRECTORY}/download_databases-contamination.sh ${DATABASE_DIRECTORY} | grep -v "\[partial-database\]"
bash ${SCRIPT_DIRECTORY}/download_databases-contamination.sh ${DATABASE_DIRECTORY} ${N_JOBS} | grep -v "\[partial-database\]"

echo ". .. ... ..... ........ ............."
echo "Downloading and configuring database (classify)"
echo ". .. ... ..... ........ ............."
echo "This might take a while depending on source database i/o speed..."
bash ${SCRIPT_DIRECTORY}/download_databases-classify.sh ${DATABASE_DIRECTORY} | grep -v "\[partial-database\]"
bash ${SCRIPT_DIRECTORY}/download_databases-classify.sh ${DATABASE_DIRECTORY} ${N_JOBS} | grep -v "\[partial-database\]"

echo ". .. ... ..... ........ ............."
echo "Downloading and configuring database (annotate)"
echo ". .. ... ..... ........ ............."
echo "This might take a while depending on source database i/o speed..."
bash ${SCRIPT_DIRECTORY}/download_databases-annotate.sh ${DATABASE_DIRECTORY} | grep -v "\[partial-database\]"
bash ${SCRIPT_DIRECTORY}/download_databases-annotate.sh ${DATABASE_DIRECTORY} ${N_JOBS} | grep -v "\[partial-database\]"

# Environment variables
echo ". .. ... ..... ........ ............."
Expand Down

0 comments on commit 2848a37

Please sign in to comment.