Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 50 additions & 34 deletions Multiplex_Analysis_Web_Apps.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
'''
import os
import re
import logging
import subprocess
import numpy as np

Expand All @@ -12,41 +13,45 @@
import nidap_dashboard_lib as ndl # Useful functions for dashboards connected to NIDAP
import streamlit_utils
import platform_io
import install_missing_packages

install_missing_packages.live_package_installation()

# Note if any of the following imports having " # slow" are not commented out, there is a delay in running the forking test
from pages2 import data_import_and_export
from pages2 import datafile_format_unifier
from pages2 import open_file
from pages2 import feature_creation
from pages2 import robust_scatter_plotter
from pages2 import multiaxial_gating
from pages2 import thresholded_phenotyping # slow due to things ultimately importing umap
from pages2 import adaptive_phenotyping
from pages2 import Pheno_Cluster_a # "slow" for forking test initialization
from pages2 import Pheno_Cluster_b # "slow" for forking test initialization
from pages2 import Tool_parameter_selection
from pages2 import Run_workflow
from pages2 import Display_individual_ROI_heatmaps
from pages2 import Display_average_heatmaps
from pages2 import Display_average_heatmaps_per_annotation
from pages2 import Display_ROI_P_values_overlaid_on_slides
from pages2 import Neighborhood_Profiles # slow due to things ultimately importing umap
from pages2 import UMAP_Analyzer # slow due to things ultimately importing umap
from pages2 import Clusters_Analyzer # slow due to things ultimately importing umap
from pages2 import memory_analyzer
from pages2 import radial_bins_plots
from pages2 import radial_profiles_analysis
from pages2 import preprocessing
from pages2 import results_transfer
# from pages2 import forking_test

from pages import data_import_and_export
from pages import datafile_format_unifier
from pages import open_file
from pages import feature_creation
from pages import robust_scatter_plotter
from pages import multiaxial_gating
from pages import thresholded_phenotyping # slow due to things ultimately importing umap
from pages import adaptive_phenotyping
from pages import Pheno_Cluster_a # "slow" for forking test initialization
from pages import Pheno_Cluster_b # "slow" for forking test initialization
from pages import Tool_parameter_selection
from pages import Run_workflow
from pages import Display_individual_ROI_heatmaps
from pages import Display_average_heatmaps
from pages import Display_average_heatmaps_per_annotation
from pages import Display_ROI_P_values_overlaid_on_slides
from pages import Neighborhood_Profiles # slow due to things ultimately importing umap
from pages import UMAP_Analyzer # slow due to things ultimately importing umap
from pages import Clusters_Analyzer # slow due to things ultimately importing umap
from pages import memory_analyzer
from pages import radial_bins_plots
from pages import radial_profiles_analysis
from pages import preprocessing
from pages import results_transfer
# from pages import forking_test

# Configure logging
logging.basicConfig(level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

def welcome_page():
'''
First page displayed when the app opens
First page displayed when the app opens.

This requires some extra work to make the markdown rendering
work properly.
'''
# Markdown text
with open("markdown/MAWA_WelcomePage.md", "r", encoding="utf-8") as f:
Expand All @@ -73,13 +78,20 @@ def check_for_platform(session_state):
'''
Set the platform parameters based on the platform the Streamlit app is running on
'''

# Initialize the platform object
if 'platform' not in session_state:
logger.info('Platform initialization starting.')

session_state['platform'] = platform_io.Platform(platform=('nidap' if platform_is_nidap() else 'local'))
logger.info('Platform initialization complete.')
return session_state


def main():
'''
Main function for running the Multiplex Analysis Web Apps
'''

st.set_page_config(layout="wide")

Expand Down Expand Up @@ -143,9 +155,11 @@ def main():
# Ensure the input/output directories exist
input_path = './input'
if not os.path.exists(input_path):
logger.info("Creating input directory at %s", input_path)
os.makedirs(input_path)
output_path = './output'
if not os.path.exists(output_path):
logger.info("Creating output directory at %s", output_path)
os.makedirs(output_path)

# For widget persistence, we need always copy the session state to itself, being careful with widgets that cannot be persisted, like st.data_editor() (where we use the "__do_not_persist" suffix to avoid persisting it)
Expand Down Expand Up @@ -173,16 +187,18 @@ def main():

# Initalize session_state values for streamlit processing
if 'init' not in st.session_state:
logger.info("Initializing session state")
st.session_state = ndl.init_session_state(st.session_state)

# Sidebar organization
with st.sidebar:
st.write('**:book: [Documentation](https://ncats.github.io/multiplex-analysis-web-apps/)**')
st.write('**📖 [Documentation](https://ncats.github.io/multiplex-analysis-web-apps/)**')
with st.expander('Advanced:'):
benchmark_button = True
if benchmark_button:
st.button('Record Benchmarking', on_click = st.session_state.bc.save_run_to_csv)
if st.button('Record Benchmarking'):
logger.info("Recording benchmark information")
st.session_state.bc.save_run_to_csv()
if st.button('Calculate memory used by Python session'):
logger.info("Calculating memory used by Python session")
streamlit_utils.write_python_session_memory_usage()

# Check the platform
Expand Down
93 changes: 44 additions & 49 deletions basic_phenotyper_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,26 +100,32 @@ def init_pheno_cols(df, marker_names, marker_col_prefix):
df_markers = df[marker_cols]

df_markers = df_markers.map(lambda x: {'+': '1', '-': '0'}[x[-1]])
df['mark_bits'] = df_markers.astype(str).apply(''.join, axis='columns') # efficiently create a series of strings that are the columns (in string format) concatenated together

# Vectorized creation of 'mark_bits'
df['mark_bits'] = df_markers.astype(str).agg(''.join, axis=1)

# Add a column of prettier names for the species, e.g., 'VIM- ECAD+ COX2+ NOS2-'
df['species_name_long'] = df['mark_bits'].apply(lambda mark_bits: ' '.join([marker_name + ('+' if marker_bit == '1' else '-') for marker_name, marker_bit in zip(marker_names, mark_bits)]))

# Add a column dropping the negative markers from these pretty names, e.g., 'ECAD+ COX2+'
def species_name_long_to_short(species_name_long):
x = '+ '.join([marker_names[iy] for iy, y in enumerate([x for x in species_name_long if x in ('+', '-')]) if y == '+']) + '+'
species_name_short = x if len(x) != 1 else 'Other'
return species_name_short
# This can possibly be made faster (if it's correct) via but I haven't tested it:
# marker_indices = [i for i, x in enumerate(species_name_long) if x == '+']
# if not marker_indices:
# return 'Other'
# return ' + '.join(marker_names[i] for i in marker_indices) + '+'
df['species_name_short'] = df['species_name_long'].apply(species_name_long_to_short)

# Create a new column called 'has pos mark' identifying which species_name_shorts are not Other
df['has_pos_mark'] = True
df.loc[df['species_name_short'] == 'Other', 'has_pos_mark'] = False
df['species_name_long'] = df['mark_bits'].apply(
lambda mark_bits: ' '.join(
[f"{marker_name}{'+' if bit == '1' else '-'}" for marker_name, bit in zip(marker_names, mark_bits)]
)
)

# # Add a column dropping the negative markers from these pretty names, e.g., 'ECAD+ COX2+'
# def species_name_long_to_short(species_name_long):
# x = '+ '.join([marker_names[iy] for iy, y in enumerate([x for x in species_name_long if x in ('+', '-')]) if y == '+']) + '+'
# species_name_short = x if len(x) != 1 else 'Other'
# return species_name_short
# # This can possibly be made faster (if it's correct) via but I haven't tested it:
# # marker_indices = [i for i, x in enumerate(species_name_long) if x == '+']
# # if not marker_indices:
# # return 'Other'
# # return ' + '.join(marker_names[i] for i in marker_indices) + '+'
# df['species_name_short'] = df['species_name_long'].apply(species_name_long_to_short)
df['species_name_short'] = df['species_name_long'].str.extractall(r'(\w+)\+').groupby(level=0).agg(' + '.join).fillna('Other') + '+'
Copy link

Copilot AI Oct 23, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new vectorized implementation for 'species_name_short' changes the output format compared to the commented-out function. The original function returns 'Other' for cases with no positive markers, but the new implementation returns 'Other+'. Additionally, cells with positive markers will get an extra '+' at the end. For example, 'ECAD+ COX2+' in the old version will become 'ECAD + COX2++' in the new version. Consider using: df['species_name_short'] = df['species_name_long'].str.extractall(r'(\w+)\+').groupby(level=0).agg(' + '.join).fillna('Other').apply(lambda x: x if x == 'Other' else x + '+')

Suggested change
df['species_name_short'] = df['species_name_long'].str.extractall(r'(\w+)\+').groupby(level=0).agg(' + '.join).fillna('Other') + '+'
df['species_name_short'] = df['species_name_long'].str.extractall(r'(\w+)\+').groupby(level=0).agg(' + '.join).fillna('Other').apply(lambda x: x if x == 'Other' else x + '+')

Copilot uses AI. Check for mistakes.

# Create a new column called 'has pos mark' identifying which species_name_shorts are not "Other"
df['has_pos_mark'] = df['species_name_short'] != 'Other'

# Create phenotype column and assign a value of 'unassigned'
df['phenotype'] = 'unassigned'
Expand All @@ -143,36 +149,15 @@ def init_pheno_assign(df):
of each "exclusive" species
'''

st_init_species = time.time()
spec_summ = df[['species_name_short', 'phenotype', 'species_name_long']]
sp_init_species = time.time()
elapsed = round(sp_init_species - st_init_species, 3)
print(f' Initalizing Phenotying Assignments: {elapsed}s')

# This line seems to throw a TypeError: unhashable type: 'numpy.ndarray' error
spec_summ['species_count'] = spec_summ['species_name_short'].groupby(spec_summ['species_name_short']).transform('count')
spec_summ = spec_summ.drop_duplicates().reset_index(drop=True)
spec_summ = df[['species_name_short', 'phenotype', 'species_name_long']].copy()

# The above seems a bit inefficient and should probably be replaced with something like this:
# spec_summ = spec_summ['species_name_short'].value_counts().reset_index()
# spec_summ.columns = ['species_name_short', 'species_count']
species_counts = spec_summ['species_name_short'].value_counts().reset_index()
species_counts.columns = ['species_name_short', 'species_count']

sp_species_count = time.time()
elapsed_counts = round(sp_species_count - sp_init_species, 3)
print(f' Phenotying Assignments Counts Calculations: {elapsed_counts}s')
spec_summ = spec_summ.drop_duplicates(subset=['species_name_short']).merge(species_counts, on='species_name_short')
spec_summ['species_percent'] = (spec_summ['species_count'] / spec_summ['species_count'].sum() * 100).round(2)

spec_summ['species_percent'] = [round(100*x/sum(spec_summ['species_count']), 2) for x in spec_summ['species_count']]
sp_species_per = time.time()
elapsed_per = round(sp_species_per - sp_species_count, 3)
print(f' Phenotying Assignments Percents Calculations: {elapsed_per}s')

spec_summ = spec_summ.sort_values(by='species_count', ascending= False).reset_index(drop=True)
sp_species_sort = time.time()
elapsed_sort = round(sp_species_sort - sp_species_per, 3)
print(f' Phenotying Assignments sorting: {elapsed_sort}s')

# Return the created dataframe
return spec_summ
return spec_summ.sort_values(by='species_count', ascending=False).reset_index(drop=True)

def init_pheno_summ(df):
'''For each unique species (elsewhere called "exclusive" phenotyping),
Expand All @@ -187,11 +172,21 @@ def init_pheno_summ(df):
each "exclusive" species
'''

assign_pheno = df[['phenotype', 'species_name_short', 'species_name_long']].groupby(by='phenotype', as_index = False).agg(lambda x: np.unique(list(x)))
# Group by phenotype and aggregate unique values for species_name_short and species_name_long
assign_pheno = df.groupby('phenotype', as_index=False).agg({
'species_name_short': lambda x: ', '.join(str(val) for val in pd.unique(x.dropna())),
'species_name_long': lambda x: ', '.join(str(val) for val in pd.unique(x.dropna()))
})

# Calculate phenotype counts and percentages
phenotype_counts = df['phenotype'].value_counts()
total_count = phenotype_counts.sum()

assign_pheno['phenotype_count'] = assign_pheno['phenotype'].map(phenotype_counts)
assign_pheno['phenotype_percent'] = (assign_pheno['phenotype_count'] / total_count * 100).round(2)

assign_pheno['phenotype_count'] = [sum(df['phenotype'] == x) for x in assign_pheno.phenotype]
assign_pheno['phenotype_percent'] = [round(100*x/sum(assign_pheno['phenotype_count']), 2) for x in assign_pheno['phenotype_count']]
assign_pheno = assign_pheno.sort_values(by='phenotype_count', ascending=False)
# Sort by phenotype count in descending order
assign_pheno = assign_pheno.sort_values(by='phenotype_count', ascending=False).reset_index(drop=True)

return assign_pheno

Expand Down
18 changes: 13 additions & 5 deletions install_missing_packages.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@ def is_mamba_installed():
'''
try:
# Run the 'mamba --version' command
result = subprocess.run(['mamba', '--version'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
result = subprocess.run(['mamba', '--version'],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE, text=True, check=False)

# Check if the command was successful
if result.returncode == 0:
Expand All @@ -36,7 +38,9 @@ def install_with_mamba(packages):
print(f"&&&& Attempting to install {', '.join(packages)} with mamba.")
try:
# Run the 'mamba install <packages>' command
result = subprocess.run(['mamba', 'install', '-y'] + packages, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
result = subprocess.run(['mamba', 'install', '-y'] + packages,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE, text=True, check=False)

# Check if the command was successful
if result.returncode == 0:
Expand All @@ -56,7 +60,9 @@ def install_with_conda(packages):
print(f"&&&& Attempting to install {', '.join(packages)} with conda.")
try:
# Run the 'conda install <packages>' command
result = subprocess.run(['conda', 'install', '-y'] + packages, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
result = subprocess.run(['conda', 'install', '-y'] + packages,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE, text=True, check=False)

# Check if the command was successful
if result.returncode == 0:
Expand All @@ -75,7 +81,9 @@ def install_with_pip(packages):
print(f"&&&& Attempting to install {', '.join(packages)} with pip.")
try:
# Run the 'pip install <packages>' command
result = subprocess.run(['pip', 'install'] + packages, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
result = subprocess.run(['pip', 'install'] + packages,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE, text=True, check=False)

# Check if the command was successful
if result.returncode == 0:
Expand All @@ -91,7 +99,7 @@ def live_package_installation():
'''
Function to check if packages are installed
'''

# last two probably only needed for published dashboards
packages_to_install = ['hnswlib', 'parc', 'sklearn_ann', 'annoy', 'pyNNDescent']
installers_to_use = ['mamba', 'pip']
Expand Down
5 changes: 1 addition & 4 deletions nidap_dashboard_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,10 @@
alt.data_transformers.disable_max_rows()
from natsort import natsorted
from pathlib import Path
from datetime import datetime
import basic_phenotyper_lib as bpl # Useful functions for cell phenotyping
from foundry_IO_lib import foundry_IO_lib # Foundry Input/Output Class
from benchmark_collector import benchmark_collector # Benchmark Collector Class
from neighborhood_profiles import NeighborhoodProfiles, UMAPDensityProcessing # slow because this imports umap
import PlottingTools as umPT

def identify_col_type(col):
'''
Expand Down Expand Up @@ -327,9 +325,8 @@ def set_phenotyping_elements(session_state, df_orig):
if hasattr(session_state, 'dataeditor__do_not_persist'):
delattr(session_state, 'dataeditor__do_not_persist')

# Initalize Phenotyping Settings (Radio BUttons)
# Initalize Phenotyping Settings (Radio Buttons)
session_state.noPhenoOpt = 'Not Selected'
session_state.phenoMeth = 'Species' # Default when first loaded
session_state.selected_phenoMeth = session_state.noPhenoOpt # Default when first loaded

return session_state
Expand Down
Loading