Skip to content

Commit

Permalink
Set ascp excplicitly
Browse files Browse the repository at this point in the history
  • Loading branch information
arkid15r committed Sep 26, 2023
1 parent 55cb289 commit 8001ba6
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 14 deletions.
4 changes: 2 additions & 2 deletions workers/data_refinery_workers/downloaders/geo.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def _download_file_aspera(
downloader_job=downloader_job.id,
)

ascp = "ascp"
ascp_path = "/home/user/.aspera/ascli/sdk/ascp"
key = "keys/asperaweb_id_dsa.openssh"
url = download_url
user = "anonftp"
Expand All @@ -84,7 +84,7 @@ def _download_file_aspera(

# Resume level 1, use encryption, unlimited speed
command_str = "{} -i {} -k1 -T {}@{}:{} {}".format(
ascp, key, user, ftp, url, target_file_path
ascp_path, key, user, ftp, url, target_file_path
)
formatted_command = command_str.format(src=download_url, dest=target_file_path)
completed_command = subprocess.run(
Expand Down
31 changes: 24 additions & 7 deletions workers/data_refinery_workers/downloaders/sra.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,14 @@ def _download_file_aspera(
# aspera.sra.ebi.ac.uk users port 33001 for SSH communication
# We are also NOT using encryption (-T) to avoid slowdown,
# and we are not using any kind of rate limiting.
command_str = "ascp -QT -l 300m -P 33001 -i keys/asperaweb_id_dsa.openssh {src} {dest}"
formatted_command = command_str.format(src=download_url, dest=target_file_path)
command_str = (
"{ascp_path} -QT -l 300m -P 33001 -i keys/asperaweb_id_dsa.openssh {src} {dest}"
)
formatted_command = command_str.format(
ascp_path="/home/user/.aspera/ascli/sdk/ascp",
src=download_url,
dest=target_file_path,
)
logger.info("Starting ENA ascp", time=str(timezone.now()))
completed_command = subprocess.run(
formatted_command.split(),
Expand All @@ -111,8 +117,14 @@ def _download_file_aspera(
# NCBI requires encryption and recommends -k1 resume, as
# well as the 450m limit and -Q (play fair).
# ex: https://github.com/AlexsLemonade/refinebio/pull/1189#issuecomment-478018580
command_str = "ascp -p -Q -T -k1 -l 450m -i keys/asperaweb_id_dsa.openssh {src} {dest}"
formatted_command = command_str.format(src=download_url, dest=target_file_path)
command_str = (
"{ascp_path} -p -Q -T -k1 -l 450m -i keys/asperaweb_id_dsa.openssh {src} {dest}"
)
formatted_command = command_str.format(
ascp_path="/home/user/.aspera/ascli/sdk/ascp",
src=download_url,
dest=target_file_path,
)
logger.info("Starting NCBI ascp", time=str(timezone.now()))
completed_command = subprocess.run(
formatted_command.split(),
Expand All @@ -123,7 +135,6 @@ def _download_file_aspera(

# Something went wrong! Else, just fall through to returning True.
if completed_command.returncode != 0:

stdout = completed_command.stdout.decode().strip()
stderr = completed_command.stderr.decode().strip()
logger.debug(
Expand Down Expand Up @@ -181,15 +192,21 @@ def _download_file_aspera(
)
time.sleep(10)
return _download_file_aspera(
download_url, downloader_job, target_file_path, attempt + 1, original_file, source
download_url,
downloader_job,
target_file_path,
attempt + 1,
original_file,
source,
)
return True


def _has_unmated_reads(accession_code: str, downloader_job: DownloaderJob) -> bool:
"""Checks if the SRA accession has unmated reads.
Returns True if it does and False if it doesn't, and also whether or not it successfully connected to the ENA server"""
Returns True if it does and False if it doesn't, and also whether or not it successfully connected to the ENA server
"""
full_ftp_link = _build_ena_file_url(accession_code)

# Strip off the protocol code because we know it's FTP and the FTP
Expand Down
5 changes: 2 additions & 3 deletions workers/dockerfiles/Dockerfile.downloaders
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@ RUN pip3 install --ignore-installed --no-cache-dir -r requirements.txt
RUN <<EOF
mkdir -m 700 ~/.gnupg/
echo "disable-ipv6" >> ~/.gnupg/dirmngr.conf
gpg --keyserver keyserver.ubuntu.com --recv-keys \
409B6B1796C275462A1703113804BB82D39DC0E3 7D2BAF1CF37B13E2069D6956105BD0E739499BDB
gpg --keyserver keyserver.ubuntu.com --recv-keys 409B6B1796C275462A1703113804BB82D39DC0E3 7D2BAF1CF37B13E2069D6956105BD0E739499BDB
curl -sSL https://get.rvm.io | bash -s stable --ruby --gems=aspera-cli
EOF

# Install ascp to /home/user/.aspera/ascli/sdk/ascp.
USER user
RUN <<EOF
. /usr/local/rvm/scripts/rvm
Expand All @@ -40,7 +40,6 @@ COPY workers/ .

RUN rm -rf /root/.cache/*

ENV PATH="$PATH:/home/user/.aspera/ascli/sdk"
ENV SYSTEM_VERSION=$SYSTEM_VERSION

USER user
Expand Down
3 changes: 1 addition & 2 deletions workers/dockerfiles/Dockerfile.salmon
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,7 @@ EOF

# Get the latest version from the dist directory.
COPY common/dist/data-refinery-common-* common/
RUN pip3 install --ignore-installed --no-cache-dir \
common/$(ls common -1 | sort --version-sort | tail -1)
RUN pip3 install --ignore-installed --no-cache-dir common/$(ls common -1 | sort --version-sort | tail -1)

COPY .boto .boto
COPY workers/ .
Expand Down

0 comments on commit 8001ba6

Please sign in to comment.