ncats · djsmith17 · Aug 13, 2025 · Aug 13, 2025 · Aug 13, 2025 · Aug 13, 2025
diff --git a/Multiplex_Analysis_Web_Apps.py b/Multiplex_Analysis_Web_Apps.py
@@ -3,6 +3,7 @@
 '''
 import os
 import re
+import logging
 import subprocess
 import numpy as np
 
@@ -12,41 +13,45 @@
 import nidap_dashboard_lib as ndl   # Useful functions for dashboards connected to NIDAP
 import streamlit_utils
 import platform_io
-import install_missing_packages
-
-install_missing_packages.live_package_installation()
 
 # Note if any of the following imports having "  # slow" are not commented out, there is a delay in running the forking test
-from pages2 import data_import_and_export
-from pages2 import datafile_format_unifier
-from pages2 import open_file
-from pages2 import feature_creation
-from pages2 import robust_scatter_plotter
-from pages2 import multiaxial_gating
-from pages2 import thresholded_phenotyping  # slow due to things ultimately importing umap
-from pages2 import adaptive_phenotyping
-from pages2 import Pheno_Cluster_a  # "slow" for forking test initialization
-from pages2 import Pheno_Cluster_b  # "slow" for forking test initialization
-from pages2 import Tool_parameter_selection
-from pages2 import Run_workflow
-from pages2 import Display_individual_ROI_heatmaps
-from pages2 import Display_average_heatmaps
-from pages2 import Display_average_heatmaps_per_annotation
-from pages2 import Display_ROI_P_values_overlaid_on_slides
-from pages2 import Neighborhood_Profiles  # slow due to things ultimately importing umap
-from pages2 import UMAP_Analyzer  # slow due to things ultimately importing umap
-from pages2 import Clusters_Analyzer  # slow due to things ultimately importing umap
-from pages2 import memory_analyzer
-from pages2 import radial_bins_plots
-from pages2 import radial_profiles_analysis
-from pages2 import preprocessing
-from pages2 import results_transfer
-# from pages2 import forking_test
-
+from pages import data_import_and_export
+from pages import datafile_format_unifier
+from pages import open_file
+from pages import feature_creation
+from pages import robust_scatter_plotter
+from pages import multiaxial_gating
+from pages import thresholded_phenotyping  # slow due to things ultimately importing umap
+from pages import adaptive_phenotyping
+from pages import Pheno_Cluster_a  # "slow" for forking test initialization
+from pages import Pheno_Cluster_b  # "slow" for forking test initialization
+from pages import Tool_parameter_selection
+from pages import Run_workflow
+from pages import Display_individual_ROI_heatmaps
+from pages import Display_average_heatmaps
+from pages import Display_average_heatmaps_per_annotation
+from pages import Display_ROI_P_values_overlaid_on_slides
+from pages import Neighborhood_Profiles  # slow due to things ultimately importing umap
+from pages import UMAP_Analyzer  # slow due to things ultimately importing umap
+from pages import Clusters_Analyzer  # slow due to things ultimately importing umap
+from pages import memory_analyzer
+from pages import radial_bins_plots
+from pages import radial_profiles_analysis
+from pages import preprocessing
+from pages import results_transfer
+# from pages import forking_test
+
+# Configure logging
+logging.basicConfig(level=logging.INFO,
+                    format='%(asctime)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
 
 def welcome_page():
     '''
-    First page displayed when the app opens
+    First page displayed when the app opens.
+
+    This requires some extra work to make the markdown rendering
+    work properly.
     '''
     # Markdown text
     with open("markdown/MAWA_WelcomePage.md", "r", encoding="utf-8") as f:
@@ -73,13 +78,20 @@ def check_for_platform(session_state):
     '''
     Set the platform parameters based on the platform the Streamlit app is running on
     '''
+
     # Initialize the platform object
     if 'platform' not in session_state:
+        logger.info('Platform initialization starting.')
+
         session_state['platform'] = platform_io.Platform(platform=('nidap' if platform_is_nidap() else 'local'))
+        logger.info('Platform initialization complete.')
     return session_state
 
 
 def main():
+    '''
+    Main function for running the Multiplex Analysis Web Apps
+    '''
 
     st.set_page_config(layout="wide")
 
@@ -143,9 +155,11 @@ def main():
     # Ensure the input/output directories exist
     input_path = './input'
     if not os.path.exists(input_path):
+        logger.info("Creating input directory at %s", input_path)
         os.makedirs(input_path)
     output_path = './output'
     if not os.path.exists(output_path):
+        logger.info("Creating output directory at %s", output_path)
         os.makedirs(output_path)
 
     # For widget persistence, we need always copy the session state to itself, being careful with widgets that cannot be persisted, like st.data_editor() (where we use the "__do_not_persist" suffix to avoid persisting it)
@@ -173,16 +187,18 @@ def main():
 
     # Initalize session_state values for streamlit processing
     if 'init' not in st.session_state:
+        logger.info("Initializing session state")
         st.session_state = ndl.init_session_state(st.session_state)
 
     # Sidebar organization
     with st.sidebar:
-        st.write('**:book: [Documentation](https://ncats.github.io/multiplex-analysis-web-apps/)**')
+        st.write('**📖 [Documentation](https://ncats.github.io/multiplex-analysis-web-apps/)**')
         with st.expander('Advanced:'):
-            benchmark_button = True
-            if benchmark_button:
-                st.button('Record Benchmarking', on_click = st.session_state.bc.save_run_to_csv)
+            if st.button('Record Benchmarking'):
+                logger.info("Recording benchmark information")
+                st.session_state.bc.save_run_to_csv()
             if st.button('Calculate memory used by Python session'):
+                logger.info("Calculating memory used by Python session")
                 streamlit_utils.write_python_session_memory_usage()
 
     # Check the platform

diff --git a/basic_phenotyper_lib.py b/basic_phenotyper_lib.py
@@ -100,26 +100,32 @@ def init_pheno_cols(df, marker_names, marker_col_prefix):
             df_markers = df[marker_cols]
 
         df_markers = df_markers.map(lambda x: {'+': '1', '-': '0'}[x[-1]])
-    df['mark_bits'] = df_markers.astype(str).apply(''.join, axis='columns')  # efficiently create a series of strings that are the columns (in string format) concatenated together
+
+    # Vectorized creation of 'mark_bits'
+    df['mark_bits'] = df_markers.astype(str).agg(''.join, axis=1)
 
     # Add a column of prettier names for the species, e.g., 'VIM- ECAD+ COX2+ NOS2-'
-    df['species_name_long'] = df['mark_bits'].apply(lambda mark_bits: ' '.join([marker_name + ('+' if marker_bit == '1' else '-') for marker_name, marker_bit in zip(marker_names, mark_bits)]))
-
-    # Add a column dropping the negative markers from these pretty names, e.g., 'ECAD+ COX2+'
-    def species_name_long_to_short(species_name_long):
-        x = '+ '.join([marker_names[iy] for iy, y in enumerate([x for x in species_name_long if x in ('+', '-')]) if y == '+']) + '+'
-        species_name_short = x if len(x) != 1 else 'Other'
-        return species_name_short
-    # This can possibly be made faster (if it's correct) via but I haven't tested it:
-        # marker_indices = [i for i, x in enumerate(species_name_long) if x == '+']
-        # if not marker_indices:
-        #     return 'Other'
-        # return ' + '.join(marker_names[i] for i in marker_indices) + '+'
-    df['species_name_short'] = df['species_name_long'].apply(species_name_long_to_short)
-
-    # Create a new column called 'has pos mark' identifying which species_name_shorts are not Other
-    df['has_pos_mark'] = True
-    df.loc[df['species_name_short'] == 'Other', 'has_pos_mark'] = False
+    df['species_name_long'] = df['mark_bits'].apply(
+        lambda mark_bits: ' '.join(
+            [f"{marker_name}{'+' if bit == '1' else '-'}" for marker_name, bit in zip(marker_names, mark_bits)]
+        )
+    )
+
+    # # Add a column dropping the negative markers from these pretty names, e.g., 'ECAD+ COX2+'
+    # def species_name_long_to_short(species_name_long):
+    #     x = '+ '.join([marker_names[iy] for iy, y in enumerate([x for x in species_name_long if x in ('+', '-')]) if y == '+']) + '+'
+    #     species_name_short = x if len(x) != 1 else 'Other'
+    #     return species_name_short
+    # # This can possibly be made faster (if it's correct) via but I haven't tested it:
+    #     # marker_indices = [i for i, x in enumerate(species_name_long) if x == '+']
+    #     # if not marker_indices:
+    #     #     return 'Other'
+    #     # return ' + '.join(marker_names[i] for i in marker_indices) + '+'
+    # df['species_name_short'] = df['species_name_long'].apply(species_name_long_to_short)
+    df['species_name_short'] = df['species_name_long'].str.extractall(r'(\w+)\+').groupby(level=0).agg(' + '.join).fillna('Other') + '+'
-    df['species_name_short'] = df['species_name_long'].str.extractall(r'(\w+)\+').groupby(level=0).agg(' + '.join).fillna('Other') + '+'
+    df['species_name_short'] = df['species_name_long'].str.extractall(r'(\w+)\+').groupby(level=0).agg(' + '.join).fillna('Other').apply(lambda x: x if x == 'Other' else x + '+')
-    df['species_name_short'] = df['species_name_long'].str.extractall(r'(\w+)\+').groupby(level=0).agg(' + '.join).fillna('Other') + '+'
+    df['species_name_short'] = df['species_name_long'].str.extractall(r'(\w+)\+').groupby(level=0).agg(' + '.join).fillna('Other').apply(lambda x: x if x == 'Other' else x + '+')
+
+    # Create a new column called 'has pos mark' identifying which species_name_shorts are not "Other"
+    df['has_pos_mark'] = df['species_name_short'] != 'Other'
 
     # Create phenotype column and assign a value of 'unassigned'
     df['phenotype'] = 'unassigned'
@@ -143,36 +149,15 @@ def init_pheno_assign(df):
                                       of each "exclusive" species
     '''
 
-    st_init_species = time.time()
-    spec_summ = df[['species_name_short', 'phenotype', 'species_name_long']]
-    sp_init_species = time.time()
-    elapsed = round(sp_init_species - st_init_species, 3)
-    print(f'        Initalizing Phenotying Assignments: {elapsed}s')
-
-    # This line seems to throw a TypeError: unhashable type: 'numpy.ndarray' error
-    spec_summ['species_count'] = spec_summ['species_name_short'].groupby(spec_summ['species_name_short']).transform('count')
-    spec_summ = spec_summ.drop_duplicates().reset_index(drop=True)
+    spec_summ = df[['species_name_short', 'phenotype', 'species_name_long']].copy()
 
-    # The above seems a bit inefficient and should probably be replaced with something like this:
-    # spec_summ = spec_summ['species_name_short'].value_counts().reset_index()
-    # spec_summ.columns = ['species_name_short', 'species_count']
+    species_counts = spec_summ['species_name_short'].value_counts().reset_index()
+    species_counts.columns = ['species_name_short', 'species_count']
 
-    sp_species_count = time.time()
-    elapsed_counts = round(sp_species_count - sp_init_species, 3)
-    print(f'        Phenotying Assignments Counts Calculations: {elapsed_counts}s')
+    spec_summ = spec_summ.drop_duplicates(subset=['species_name_short']).merge(species_counts, on='species_name_short')
+    spec_summ['species_percent'] = (spec_summ['species_count'] / spec_summ['species_count'].sum() * 100).round(2)
 
-    spec_summ['species_percent'] = [round(100*x/sum(spec_summ['species_count']), 2) for x in spec_summ['species_count']]
-    sp_species_per = time.time()
-    elapsed_per = round(sp_species_per - sp_species_count, 3)
-    print(f'        Phenotying Assignments Percents Calculations: {elapsed_per}s')
-
-    spec_summ = spec_summ.sort_values(by='species_count', ascending= False).reset_index(drop=True)
-    sp_species_sort = time.time()
-    elapsed_sort = round(sp_species_sort - sp_species_per, 3)
-    print(f'        Phenotying Assignments sorting: {elapsed_sort}s')
-
-    # Return the created dataframe
-    return spec_summ
+    return spec_summ.sort_values(by='species_count', ascending=False).reset_index(drop=True)
 
 def init_pheno_summ(df):
     '''For each unique species (elsewhere called "exclusive" phenotyping),
@@ -187,11 +172,21 @@ def init_pheno_summ(df):
                                         each "exclusive" species
     '''
 
-    assign_pheno = df[['phenotype', 'species_name_short', 'species_name_long']].groupby(by='phenotype', as_index = False).agg(lambda x: np.unique(list(x)))
+    # Group by phenotype and aggregate unique values for species_name_short and species_name_long
+    assign_pheno = df.groupby('phenotype', as_index=False).agg({
+        'species_name_short': lambda x: ', '.join(str(val) for val in pd.unique(x.dropna())),   
+        'species_name_long': lambda x: ', '.join(str(val) for val in pd.unique(x.dropna()))
+    })
+
+    # Calculate phenotype counts and percentages
+    phenotype_counts = df['phenotype'].value_counts()
+    total_count = phenotype_counts.sum()
+
+    assign_pheno['phenotype_count'] = assign_pheno['phenotype'].map(phenotype_counts)
+    assign_pheno['phenotype_percent'] = (assign_pheno['phenotype_count'] / total_count * 100).round(2)
 
-    assign_pheno['phenotype_count'] = [sum(df['phenotype'] == x) for x in assign_pheno.phenotype]
-    assign_pheno['phenotype_percent'] = [round(100*x/sum(assign_pheno['phenotype_count']), 2) for x in assign_pheno['phenotype_count']]
-    assign_pheno = assign_pheno.sort_values(by='phenotype_count', ascending=False)
+    # Sort by phenotype count in descending order
+    assign_pheno = assign_pheno.sort_values(by='phenotype_count', ascending=False).reset_index(drop=True)
 
     return assign_pheno
 

diff --git a/install_missing_packages.py b/install_missing_packages.py
@@ -15,7 +15,9 @@ def is_mamba_installed():
     '''
     try:
         # Run the 'mamba --version' command
-        result = subprocess.run(['mamba', '--version'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
+        result = subprocess.run(['mamba', '--version'],
+                                stdout=subprocess.PIPE,
+                                stderr=subprocess.PIPE, text=True, check=False)
 
         # Check if the command was successful
         if result.returncode == 0:
@@ -36,7 +38,9 @@ def install_with_mamba(packages):
     print(f"&&&& Attempting to install {', '.join(packages)} with mamba.")
     try:
         # Run the 'mamba install <packages>' command
-        result = subprocess.run(['mamba', 'install', '-y'] + packages, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
+        result = subprocess.run(['mamba', 'install', '-y'] + packages,
+                                stdout=subprocess.PIPE, 
+                                stderr=subprocess.PIPE, text=True, check=False)
 
         # Check if the command was successful
         if result.returncode == 0:
@@ -56,7 +60,9 @@ def install_with_conda(packages):
     print(f"&&&& Attempting to install {', '.join(packages)} with conda.")
     try:
         # Run the 'conda install <packages>' command
-        result = subprocess.run(['conda', 'install', '-y'] + packages, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
+        result = subprocess.run(['conda', 'install', '-y'] + packages,
+                                stdout=subprocess.PIPE,
+                                stderr=subprocess.PIPE, text=True, check=False)
 
         # Check if the command was successful
         if result.returncode == 0:
@@ -75,7 +81,9 @@ def install_with_pip(packages):
     print(f"&&&& Attempting to install {', '.join(packages)} with pip.")
     try:
         # Run the 'pip install <packages>' command
-        result = subprocess.run(['pip', 'install'] + packages, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
+        result = subprocess.run(['pip', 'install'] + packages,
+                                stdout=subprocess.PIPE,
+                                stderr=subprocess.PIPE, text=True, check=False)
 
         # Check if the command was successful
         if result.returncode == 0:
@@ -91,7 +99,7 @@ def live_package_installation():
     '''
     Function to check if packages are installed
     '''
-    
+
     # last two probably only needed for published dashboards
     packages_to_install = ['hnswlib', 'parc', 'sklearn_ann', 'annoy', 'pyNNDescent']
     installers_to_use = ['mamba', 'pip']

diff --git a/nidap_dashboard_lib.py b/nidap_dashboard_lib.py
@@ -14,12 +14,10 @@
 alt.data_transformers.disable_max_rows()
 from natsort import natsorted
 from pathlib import Path
-from datetime import datetime
 import basic_phenotyper_lib as bpl                  # Useful functions for cell phenotyping
 from foundry_IO_lib import foundry_IO_lib           # Foundry Input/Output Class
 from benchmark_collector import benchmark_collector # Benchmark Collector Class
 from neighborhood_profiles import NeighborhoodProfiles, UMAPDensityProcessing  # slow because this imports umap
-import PlottingTools as umPT
 
 def identify_col_type(col):
     '''
@@ -327,9 +325,8 @@ def set_phenotyping_elements(session_state, df_orig):
     if hasattr(session_state, 'dataeditor__do_not_persist'):
         delattr(session_state, 'dataeditor__do_not_persist')
 
-    # Initalize Phenotyping Settings (Radio BUttons)
+    # Initalize Phenotyping Settings (Radio Buttons)
     session_state.noPhenoOpt = 'Not Selected'
-    session_state.phenoMeth  = 'Species'                         # Default when first loaded
     session_state.selected_phenoMeth = session_state.noPhenoOpt  # Default when first loaded
 
     return session_state