adding studies without samples

wtsi-hgi · Nov 30, 2021 · d3683a6 · d3683a6
1 parent 6b9a91e
commit d3683a6
Show file tree

Hide file tree

Showing 4 changed files with 86 additions and 78 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -50,7 +50,7 @@ WORKDIR /app
 COPY requirements.txt ./
 
 RUN pip3 install -r requirements.txt
-RUN pip3 install git+https://gitlab.internal.sanger.ac.uk/hgi-projects/uploadtogenestack@2.7#egg=uploadtogenestack
+RUN pip3 install git+https://gitlab.internal.sanger.ac.uk/hgi-projects/uploadtogenestack@2.8#egg=uploadtogenestack
 
 # Copying Python scripts, and built frontend
 COPY . .

diff --git a/api.py b/api.py
@@ -24,6 +24,8 @@
 import csv
 import importlib.metadata
 from json.decoder import JSONDecodeError
+import os
+from pathlib import Path
 import time
 import typing as T
 
@@ -42,13 +44,16 @@
 # we define in the config which genestack server we're using
 gs_config = uploadtogenestack.GenestackStudy.get_gs_config(
     config.GENESTACK_SERVER)
+ssh_key_path = f"{os.environ['HOME']}/.ssh/id_rsa_genestack"
 
 # as we need to pull files from the S3 bucket, we need a connection to the bucket
 # to start of with. this can throw an exception right at the start of the program
 # if we can't access the bucket, so a public policy will need setting so we
 # can start the app
 try:
-    s3_bucket = uploadtogenestack.S3BucketUtils(gs_config["genestackbucket"])
+    s3_bucket = uploadtogenestack.S3BucketUtils(
+        gs_config["genestackbucket"],
+        ssh_key_filepath=ssh_key_path)
 except (
     botocore.exceptions.ClientError,
     uploadtogenestack.genestackassist.BucketPermissionDenied,
@@ -114,83 +119,84 @@ def all_studies() -> Response:
             if "Study Title" not in body or body["Study Title"] == "":
                 body["Study Title"] = body["Study Source"]
 
-            # We need to download the sample file from the S3 bucket and
-            # store it locally so it can get uploaded.
-            # Once it has been uploaded, we don't care about it anymore,
-            # so we'll just store it in /tmp
-            sample_file = f"/tmp/samples_{int(time.time()*1000)}.tsv"
+            sample_file: T.Optional[Path] = None
 
             with s3.S3PublicPolicy(s3_bucket):
-
-                # Getting Data from S3
-                s3_bucket.download_file(body["Sample File"], sample_file)
-
-                # Although Sample File is passed to us in our API,
+                if body.get("Sample File"):
+                    # We need to download the sample file from the S3 bucket and
+                    # store it locally so it can get uploaded.
+                    # Once it has been uploaded, we don't care about it anymore,
+                    # so we'll just store it in /tmp
+                    sample_file = f"/tmp/samples_{int(time.time()*1000)}.tsv"
+
+                    # Getting Data from S3
+                    s3_bucket.download_file(body["Sample File"], sample_file)
+
+                    # Reanming Sample File Columns
+
+                    # The user has the oppurtunity to rename columns in the sample file,
+                    # or create new columns in the sample file before it gets uploaded.
+                    # We get passed {old: ..., new: ..., colValue: ...} objects giving us
+                    # the column name to rename, the new column name and the value that should
+                    # be in the column. Leaving colValue blank shows we want to use the values
+                    # that are already in that column (which can be different in every row).
+                    # Filling in colValue means we want the same value in each cell, which
+                    # can be used when making new columns. This involves leaving `old` as
+                    # an empty string
+
+                    # The other important thing is that is a column isn't included, it'll be
+                    # deleted. We don't give the user the option to delete columns, so we need
+                    # to ensure that ALL current columns are also included. For this, we read
+                    # the headers of the sample file, and add those records in, keeping old and
+                    # new the same, and leaving colValue blank, so it uses the already existing
+                    # values.
+
+                    # Then we open a file to write this all to, how the uploadtogenestack package
+                    # expects it to be. This is a `|` separated file, with a header row:
+                    # old|new|fillvalue
+                    # where fillvalue is what we've called colValue up to now
+                    # Then we can pass the samples file, this new temp file to the package, and
+                    # get back the path of a new samples file, which we'll use later on.
+
+                    # all this is under the assumption that we're going to rename anything,
+                    # hence `if len(body["renamedColumns"]) != 0:`
+                    if len(body["renamedColumns"]) != 0:
+                        with open(sample_file, encoding="UTF-8") as samples:
+                            reader = csv.reader(samples, delimiter="\t")
+                            headers = next(reader)
+
+                        for header in headers:
+                            if header not in [x["old"] for x in body["renamedColumns"]]:
+                                body["renamedColumns"].append({
+                                    "old": header,
+                                    "new": header,
+                                    "colValue": ""
+                                })
+
+                        tmp_rename_fp: str = f"/tmp/gs-rename-{int(time.time()*1000)}.tsv"
+                        with open(tmp_rename_fp, "w", encoding="UTF-8") as tmp_rename:
+                            tmp_rename.write("old|new|fillvalue\n")
+                            for col_rename in body["renamedColumns"]:
+                                tmp_rename.write(
+                                    "|".join([
+                                        col_rename["old"],
+                                        col_rename["new"],
+                                        col_rename["colValue"]
+                                        if col_rename["colValue"] != ""
+                                        else "[fillvalue]"
+                                    ]) + "\n")
+
+                        if uploadtogenestack.GenestackUploadUtils.check_suggested_columns(
+                            tmp_rename_fp,
+                            sample_file
+                        ):
+                            sample_file = uploadtogenestack.GenestackUploadUtils. \
+                                renamesamplefilecolumns(sample_file, tmp_rename_fp)
+
+                # Although these are passed to us in our API,
                 # it would be invalid in what we pass to genestack, so we
                 # need rid of it now we've downloaded the file from S3
                 del body["Sample File"]
-
-                # Reanming Sample File Columns
-
-                # The user has the oppurtunity to rename columns in the sample file,
-                # or create new columns in the sample file before it gets uploaded.
-                # We get passed {old: ..., new: ..., colValue: ...} objects giving us
-                # the column name to rename, the new column name and the value that should
-                # be in the column. Leaving colValue blank shows we want to use the values
-                # that are already in that column (which can be different in every row).
-                # Filling in colValue means we want the same value in each cell, which
-                # can be used when making new columns. This involves leaving `old` as
-                # an empty string
-
-                # The other important thing is that is a column isn't included, it'll be
-                # deleted. We don't give the user the option to delete columns, so we need
-                # to ensure that ALL current columns are also included. For this, we read
-                # the headers of the sample file, and add those records in, keeping old and
-                # new the same, and leaving colValue blank, so it uses the already existing
-                # values.
-
-                # Then we open a file to write this all to, how the uploadtogenestack package
-                # expects it to be. This is a `|` separated file, with a header row:
-                # old|new|fillvalue
-                # where fillvalue is what we've called colValue up to now
-                # Then we can pass the samples file, this new temp file to the package, and
-                # get back the path of a new samples file, which we'll use later on.
-
-                # all this is under the assumption that we're going to rename anything,
-                # hence `if len(body["renamedColumns"]) != 0:`
-                if len(body["renamedColumns"]) != 0:
-                    with open(sample_file, encoding="UTF-8") as samples:
-                        reader = csv.reader(samples, delimiter="\t")
-                        headers = next(reader)
-
-                    for header in headers:
-                        if header not in [x["old"] for x in body["renamedColumns"]]:
-                            body["renamedColumns"].append({
-                                "old": header,
-                                "new": header,
-                                "colValue": ""
-                            })
-
-                    tmp_rename_fp: str = f"/tmp/gs-rename-{int(time.time()*1000)}.tsv"
-                    with open(tmp_rename_fp, "w", encoding="UTF-8") as tmp_rename:
-                        tmp_rename.write("old|new|fillvalue\n")
-                        for col_rename in body["renamedColumns"]:
-                            tmp_rename.write(
-                                "|".join([
-                                    col_rename["old"],
-                                    col_rename["new"],
-                                    col_rename["colValue"]
-                                    if col_rename["colValue"] != ""
-                                    else "[fillvalue]"
-                                ]) + "\n")
-
-                    if uploadtogenestack.GenestackUploadUtils.check_suggested_columns(
-                        tmp_rename_fp,
-                        sample_file
-                    ):
-                        sample_file = uploadtogenestack.GenestackUploadUtils. \
-                            renamesamplefilecolumns(sample_file, tmp_rename_fp)
-
                 del body["renamedColumns"]
 
                 # Creating Metadata TSV
@@ -210,7 +216,8 @@ def all_studies() -> Response:
                     samplefile=sample_file,
                     genestackserver=config.GENESTACK_SERVER,
                     genestacktoken=token,
-                    studymetadata=tmp_fp
+                    studymetadata=tmp_fp,
+                    ssh_key_filepath=ssh_key_path
                 )
 
             return create_response({"accession": study.study_accession}, 201)
@@ -352,7 +359,8 @@ def all_signals(study_id: str) -> Response:
                     study_genestackaccession=study_id,
                     genestackserver=config.GENESTACK_SERVER,
                     genestacktoken=token,
-                    signal_dict=body
+                    signal_dict=body,
+                    ssh_key_filepath=ssh_key_path
                 )
 
             return CREATED

diff --git a/config.py b/config.py
@@ -24,7 +24,7 @@
 
 # Configuration Settings
 
-VERSION = "1.0"
+VERSION = "1.0.1"
 
 # this is the end of the base url, identical to frontend/.env
 # this only affects the swagger ui

diff --git a/frontend/pages/studies/index.jsx b/frontend/pages/studies/index.jsx
@@ -73,7 +73,7 @@ const NewStudy = () => {
       var fields = t.data.template
         .filter((e) => !e.isReadOnly && e.dataType == "study")
         .map((e) => ({ name: e.name, required: e.isRequired }));
-      fields.unshift({ name: "Sample File", required: true });
+      fields.unshift({ name: "Sample File", required: false });
       setNewStudy(
         fields.reduce((xs, x) => ({ ...xs, [x.name]: "" }), {
           renamedColumns: [],