-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #2 from earmingol/dev
Update to v0.3.0
- Loading branch information
Showing
44 changed files
with
4,111 additions
and
3,155 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,2 @@ | ||
from .gene_info import (retrieve_ensembl2symbol_data) | ||
from .gene_info import (retrieve_ensembl2symbol_data) | ||
from .database import (load_sccellfie_database) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
import pandas as pd | ||
import os | ||
|
||
|
||
def load_sccellfie_database(organism='human', task_folder=None, rxn_info_filename=None, task_info_filename=None, | ||
task_by_rxn_filename=None, task_by_gene_filename=None, rxn_by_gene_filename=None, | ||
thresholds_filename=None): | ||
""" | ||
Loads files of the metabolic task database from either a local folder, individual file paths, or predefined URLs. | ||
Parameters | ||
---------- | ||
organism : str, optional (default: 'human') | ||
The organism to retrieve data for. Choose 'human' or 'mouse'. Used when loading from URLs. | ||
task_folder : str, optional (default: None) | ||
The local folder path containing CellFie data files. If provided, this takes priority. | ||
rxn_info_filename : str, optional (default: None) | ||
Full path for reaction information JSON file. | ||
task_info_filename : str, optional (default: None) | ||
Full path for task information CSV file. | ||
task_by_rxn_filename : str, optional (default: None) | ||
Full path for task by reaction CSV file. | ||
task_by_gene_filename : str, optional (default: None) | ||
Full path for task by gene CSV file. | ||
rxn_by_gene_filename : str, optional (default: None) | ||
Full path for reaction by gene CSV file. | ||
thresholds_filename : str, optional (default: None) | ||
Full path for thresholds CSV file. | ||
Returns | ||
------- | ||
data : dict | ||
A dictionary containing the loaded data frames and information. | ||
Keys are 'rxn_info', 'task_info', 'task_by_rxn', 'task_by_gene', 'rxn_by_gene', | ||
'thresholds', and 'organism'. | ||
Examples of dataframes can be found at https://github.com/earmingol/scCellFie/raw/refs/heads/main/task_data/homo_sapiens/ | ||
""" | ||
# Define default URLs for human and mouse data | ||
default_urls = { | ||
'human': 'https://github.com/earmingol/scCellFie/raw/refs/heads/main/task_data/homo_sapiens/', | ||
'mouse': 'https://github.com/earmingol/scCellFie/raw/refs/heads/main/task_data/mus_musculus/' | ||
} | ||
|
||
# Define default file names | ||
default_file_names = { | ||
'human': { | ||
'rxn_info': 'Rxn-Info-Recon2-2.json', | ||
'task_info': 'Task-Info.csv', | ||
'task_by_rxn': 'Task_by_Rxn.csv', | ||
'task_by_gene': 'Task_by_Gene.csv', | ||
'rxn_by_gene': 'Rxn_by_Gene.csv', | ||
'thresholds': 'Thresholds.csv' | ||
}, | ||
'mouse': { | ||
'rxn_info': 'Rxn-Info-iMM1415.json', | ||
'task_info': 'Task-Info.csv', | ||
'task_by_rxn': 'Task_by_Rxn.csv', | ||
'task_by_gene': 'Task_by_Gene.csv', | ||
'rxn_by_gene': 'Rxn_by_Gene.csv', | ||
'thresholds': 'Thresholds.csv' | ||
} | ||
} | ||
|
||
# Determine the base path and file names | ||
if task_folder: | ||
base_path = task_folder | ||
file_paths = { | ||
'rxn_info': os.path.join(base_path, default_file_names[organism]['rxn_info']), | ||
'task_info': os.path.join(base_path, default_file_names[organism]['task_info']), | ||
'task_by_rxn': os.path.join(base_path, default_file_names[organism]['task_by_rxn']), | ||
'task_by_gene': os.path.join(base_path, default_file_names[organism]['task_by_gene']), | ||
'rxn_by_gene': os.path.join(base_path, default_file_names[organism]['rxn_by_gene']), | ||
'thresholds': os.path.join(base_path, default_file_names[organism]['thresholds']) | ||
} | ||
else: | ||
base_path = default_urls.get(organism.lower()) | ||
if not base_path: | ||
raise ValueError("Invalid organism. Choose 'human' or 'mouse', or provide a custom folder path.") | ||
file_paths = { | ||
'rxn_info': rxn_info_filename or f"{base_path}/{default_file_names[organism]['rxn_info']}", | ||
'task_info': task_info_filename or f"{base_path}/{default_file_names[organism]['task_info']}", | ||
'task_by_rxn': task_by_rxn_filename or f"{base_path}/{default_file_names[organism]['task_by_rxn']}", | ||
'task_by_gene': task_by_gene_filename or f"{base_path}/{default_file_names[organism]['task_by_gene']}", | ||
'rxn_by_gene': rxn_by_gene_filename or f"{base_path}/{default_file_names[organism]['rxn_by_gene']}", | ||
'thresholds': thresholds_filename or f"{base_path}/{default_file_names[organism]['thresholds']}" | ||
} | ||
|
||
# Function to load a file | ||
def load_file(file_key, index_col=None): | ||
full_path = file_paths[file_key] | ||
try: | ||
if full_path.endswith('.json'): | ||
return pd.read_json(full_path) | ||
elif full_path.endswith('.csv'): | ||
return pd.read_csv(full_path, index_col=index_col) | ||
else: | ||
raise ValueError(f"Unsupported file format: {full_path}") | ||
except Exception as e: | ||
print(f"Error loading {full_path}: {str(e)}") | ||
return None | ||
|
||
# Load all files | ||
data = {} | ||
data['rxn_info'] = load_file('rxn_info') | ||
data['task_info'] = load_file('task_info') | ||
data['task_by_rxn'] = load_file('task_by_rxn', index_col='Task') | ||
data['task_by_gene'] = load_file('task_by_gene', index_col='Task') | ||
data['rxn_by_gene'] = load_file('rxn_by_gene', index_col='Reaction') | ||
data['thresholds'] = load_file('thresholds', index_col='symbol') | ||
data['organism'] = organism | ||
return data |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
import pytest | ||
import os | ||
import tempfile | ||
import pandas as pd | ||
|
||
from unittest.mock import patch | ||
|
||
from sccellfie.datasets.database import load_sccellfie_database # Replace 'your_module' with the actual module name | ||
|
||
# Mock data for testing | ||
mock_json_data = pd.DataFrame({'col1': [1, 2, 3], 'col2': ['a', 'b', 'c']}) | ||
mock_csv_data = pd.DataFrame({'col1': [4, 5, 6], 'col2': ['d', 'e', 'f']}) | ||
|
||
|
||
@pytest.fixture | ||
def mock_read_json(monkeypatch): | ||
def mock_read(path): | ||
return mock_json_data | ||
|
||
monkeypatch.setattr(pd, 'read_json', mock_read) | ||
|
||
|
||
@pytest.fixture | ||
def mock_read_csv(monkeypatch): | ||
def mock_read(path, index_col=None): | ||
return mock_csv_data | ||
|
||
monkeypatch.setattr(pd, 'read_csv', mock_read) | ||
|
||
|
||
def test_load_sccellfie_database_default_urls(mock_read_json, mock_read_csv): | ||
data = load_sccellfie_database(organism='human') | ||
assert isinstance(data, dict) | ||
assert 'rxn_info' in data | ||
assert 'task_info' in data | ||
assert 'task_by_rxn' in data | ||
assert 'task_by_gene' in data | ||
assert 'rxn_by_gene' in data | ||
assert 'thresholds' in data | ||
assert data['organism'] == 'human' | ||
assert data['rxn_info'].equals(mock_json_data) | ||
assert data['task_info'].equals(mock_csv_data) | ||
|
||
|
||
def test_load_sccellfie_database_local_folder(): | ||
with tempfile.TemporaryDirectory() as tmpdirname: | ||
# Create mock files | ||
pd.DataFrame().to_json(os.path.join(tmpdirname, 'Rxn-Info-Recon2-2.json')) | ||
pd.DataFrame().to_csv(os.path.join(tmpdirname, 'Task-Info.csv')) | ||
pd.DataFrame().to_csv(os.path.join(tmpdirname, 'Task_by_Rxn.csv')) | ||
pd.DataFrame().to_csv(os.path.join(tmpdirname, 'Task_by_Gene.csv')) | ||
pd.DataFrame().to_csv(os.path.join(tmpdirname, 'Rxn_by_Gene.csv')) | ||
pd.DataFrame().to_csv(os.path.join(tmpdirname, 'Thresholds.csv')) | ||
|
||
data = load_sccellfie_database(organism='human', task_folder=tmpdirname) | ||
assert isinstance(data, dict) | ||
assert 'rxn_info' in data | ||
assert 'task_info' in data | ||
assert 'task_by_rxn' in data | ||
assert 'task_by_gene' in data | ||
assert 'rxn_by_gene' in data | ||
assert 'thresholds' in data | ||
assert data['organism'] == 'human' | ||
|
||
|
||
def test_load_sccellfie_database_individual_files(): | ||
with tempfile.TemporaryDirectory() as tmpdirname: | ||
# Create mock files with unique names | ||
rxn_info_path = os.path.join(tmpdirname, 'custom_rxn_info.json') | ||
task_info_path = os.path.join(tmpdirname, 'custom_task_info.csv') | ||
task_by_rxn_path = os.path.join(tmpdirname, 'custom_task_by_rxn.csv') | ||
task_by_gene_path = os.path.join(tmpdirname, 'custom_task_by_gene.csv') | ||
rxn_by_gene_path = os.path.join(tmpdirname, 'custom_rxn_by_gene.csv') | ||
thresholds_path = os.path.join(tmpdirname, 'custom_thresholds.csv') | ||
|
||
pd.DataFrame().to_json(rxn_info_path) | ||
pd.DataFrame().to_csv(task_info_path) | ||
pd.DataFrame().to_csv(task_by_rxn_path) | ||
pd.DataFrame().to_csv(task_by_gene_path) | ||
pd.DataFrame().to_csv(rxn_by_gene_path) | ||
pd.DataFrame().to_csv(thresholds_path) | ||
|
||
data = load_sccellfie_database( | ||
organism='human', | ||
rxn_info_filename=rxn_info_path, | ||
task_info_filename=task_info_path, | ||
task_by_rxn_filename=task_by_rxn_path, | ||
task_by_gene_filename=task_by_gene_path, | ||
rxn_by_gene_filename=rxn_by_gene_path, | ||
thresholds_filename=thresholds_path | ||
) | ||
assert isinstance(data, dict) | ||
assert 'rxn_info' in data | ||
assert 'task_info' in data | ||
assert 'task_by_rxn' in data | ||
assert 'task_by_gene' in data | ||
assert 'rxn_by_gene' in data | ||
assert 'thresholds' in data | ||
assert data['organism'] == 'human' | ||
|
||
|
||
def test_load_sccellfie_database_invalid_organism(): | ||
with pytest.raises(ValueError): | ||
load_sccellfie_database(organism='invalid') | ||
|
||
|
||
@patch('pandas.read_json') | ||
@patch('pandas.read_csv') | ||
def test_load_sccellfie_database_file_error(mock_read_csv, mock_read_json): | ||
mock_read_json.side_effect = Exception("Mock JSON read error") | ||
mock_read_csv.side_effect = Exception("Mock CSV read error") | ||
|
||
data = load_sccellfie_database(organism='human') | ||
assert isinstance(data, dict) | ||
assert all(value is None for key, value in data.items() if key != 'organism') | ||
assert data['organism'] == 'human' |
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.