Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

assign-colors scripts are largely identical #286

Open
jameshadfield opened this issue Nov 3, 2024 · 0 comments
Open

assign-colors scripts are largely identical #286

jameshadfield opened this issue Nov 3, 2024 · 0 comments

Comments

@jameshadfield
Copy link
Member

Suggest consolidation. While we're at it we should remove the code around forced_colors (originally implemented for ncov) as that's always felt confusing.

diff --git a/phylogenetic/scripts/assign-colors.py b/nextclade/scripts/assign-colors.py
index e7587f5..72f9dc6 100644
--- a/phylogenetic/scripts/assign-colors.py
+++ b/nextclade/scripts/assign-colors.py
@@ -1,21 +1,24 @@
 import argparse
-import pdb
+
 import pandas as pd
 
 # Forced colours MUST NOT appear in the ordering TSV
-forced_colors = {
-}
+forced_colors = {}
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     parser = argparse.ArgumentParser(
         description="Assign colors based on ordering",
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
     )
 
-    parser.add_argument('--ordering', type=str, required=True, help="input ordering file")
-    parser.add_argument('--color-schemes', type=str, required=True, help="input color schemes file")
-    parser.add_argument('--metadata', type=str, help="if provided, restrict colors to only those found in metadata")
-    parser.add_argument('--output', type=str, required=True, help="output colors tsv")
+    parser.add_argument("--ordering", type=str, required=True, help="input ordering file")
+    parser.add_argument("--color-schemes", type=str, required=True, help="input color schemes file")
+    parser.add_argument(
+        "--metadata",
+        type=str,
+        help="if provided, restrict colors to only those found in metadata",
+    )
+    parser.add_argument("--output", type=str, required=True, help="output colors tsv")
     args = parser.parse_args()
 
     assignment = {}
@@ -34,14 +37,18 @@ if __name__ == '__main__':
     # 1. remove assignments that don't exist in metadata
     # 2. remove assignments that have 'focal' set to 'False' in metadata
     if args.metadata:
-        metadata = pd.read_csv(args.metadata, delimiter='\t')
+        metadata = pd.read_csv(args.metadata, delimiter="\t")
         for name, trait in assignment.items():
             # Items not to exclude if not (yet) present in metadata to solve bootstrapping issue
-            if name in metadata and name not in ['clade_membership', 'outbreak', 'lineage']:
+            if name in metadata and name not in [
+                "clade_membership",
+                "outbreak",
+                "lineage",
+            ]:
                 subset_present = [x for x in assignment[name] if x in metadata[name].unique()]
                 assignment[name] = subset_present
-            if name in metadata and 'focal' in metadata:
-                focal_list = metadata.loc[metadata['focal'] == True, name].unique()
+            if name in metadata and "focal" in metadata:
+                focal_list = metadata.loc[metadata["focal"] == True, name].unique()
                 subset_focal = [x for x in assignment[name] if x in focal_list]
                 assignment[name] = subset_focal
 
@@ -53,28 +60,28 @@ if __name__ == '__main__':
             array = line.lstrip().rstrip().split("\t")
             schemes[counter] = array
 
-    with open(args.output, 'w') as f:
+    with open(args.output, "w") as f:
         for trait_name, trait_array in assignment.items():
-            if len(trait_array)==0:
+            if len(trait_array) == 0:
                 print(f"No traits found for {trait_name}")
                 continue
-            if len(schemes)<len(trait_array):
-              print(f"WARNING: insufficient colours available for trait {trait_name} - reusing colours!")
-              remain = len(trait_array)
-              color_array = []
-              while(remain>0):
-                if (remain>len(schemes)):
-                  color_array = [*color_array, *schemes[len(schemes)]]
-                  remain -= len(schemes)
-                else:
-                  color_array = [*color_array, *schemes[remain]]
-                  remain = 0
+            if len(schemes) < len(trait_array):
+                print(f"WARNING: insufficient colours available for trait {trait_name} - reusing colours!")
+                remain = len(trait_array)
+                color_array = []
+                while remain > 0:
+                    if remain > len(schemes):
+                        color_array = [*color_array, *schemes[len(schemes)]]
+                        remain -= len(schemes)
+                    else:
+                        color_array = [*color_array, *schemes[remain]]
+                        remain = 0
             else:
-              color_array = schemes[len(trait_array)]
+                color_array = schemes[len(trait_array)]
             extra_trait_values = list(forced_colors.get(trait_name, {}).keys())
             extra_color_values = list(forced_colors.get(trait_name, {}).values())
 
-            zipped = list(zip(trait_array+extra_trait_values, color_array+extra_color_values))
+            zipped = list(zip(trait_array + extra_trait_values, color_array + extra_color_values))
             for trait_value, color in zipped:
                 f.write(trait_name + "\t" + trait_value + "\t" + color + "\n")
             f.write("\n")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant