Skip to content

Commit

Permalink
Merge pull request #43 from jolespin/devel
Browse files Browse the repository at this point in the history
Fixed error with unnecessary argument in compile_eukaryotic_classifications.py script
  • Loading branch information
jolespin authored Dec 28, 2023
2 parents c2c003b + 4be1d53 commit 3ed2910
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 6 deletions.
Binary file modified images/Schematic.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
3 changes: 2 additions & 1 deletion src/classify-eukaryotic.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ def get_hmmsearch_cmd( input_filepaths, output_filepaths, output_directory, dire
"-t {}".format(os.path.join(directories["intermediate"], "identifier_mapping.metaeuk.tsv")),
"--drop_duplicates",
"--index_column 4",
"--index_name id_gene",
">",
os.path.join(directories["intermediate"], "identifier_mapping.metaeuk.score_filtered.tsv"),
]
Expand Down Expand Up @@ -673,7 +674,7 @@ def main(args=None):
if opts.veba_database is None:
assert "VEBA_DATABASE" in os.environ, "Please set the following environment variable 'export VEBA_DATABASE=/path/to/veba_database' or provide path to --veba_database"
opts.veba_database = os.environ["VEBA_DATABASE"]
opts.eukaryotic_database = os.path.join(opts.veba_database, "Classify", "Microeukaryotic")
opts.eukaryotic_database = os.path.join(opts.veba_database, "Classify", "MicroEuk")
opts.hmms = os.path.join(opts.veba_database, "MarkerSets", "eukaryota_odb10.hmm.gz")
opts.scores_cutoff = os.path.join(opts.veba_database, "MarkerSets", "eukaryota_odb10.scores_cutoff.tsv.gz")

Expand Down
8 changes: 4 additions & 4 deletions src/scripts/compile_eukaryotic_classifications.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def main(args=None):
parser.add_argument("--header", type=int, default=1, help="Include header in output {0=No, 1=Yes) [Default: 1]")
parser.add_argument("--debug", action="store_true")
parser.add_argument("--remove_genes_with_missing_values", action="store_true")
parser.add_argument("--use_original_metaeuk_gene_identifiers", action="store_true")
# parser.add_argument("--use_original_metaeuk_gene_identifiers", action="store_true")

# Options
opts = parser.parse_args()
Expand Down Expand Up @@ -164,9 +164,9 @@ def main(args=None):
if opts.remove_genes_with_missing_values:
df_gene_classifications = df_gene_classifications.dropna(how="any", axis=0)

if not opts.use_original_metaeuk_gene_identifiers:
metaeuk_to_gene = df_metaeuk["gene_id"].to_dict()
df_gene_classifications.index = df_gene_classifications.index.map(lambda x: metaeuk_to_gene[x])
# if not opts.use_original_metaeuk_gene_identifiers: #!
# gene_to_header = df_metaeuk["MetaEuk_header"]
# df_gene_classifications.index = df_gene_classifications.index.map(lambda x: gene_to_header[x])

df_gene_classifications.to_csv(opts.output, sep="\t", header=bool(opts.header))

Expand Down
6 changes: 5 additions & 1 deletion src/scripts/subset_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

# from tqdm import tqdm
__program__ = os.path.split(sys.argv[0])[-1]
__version__ = "2023.3.14"
__version__ = "2023.12.28"

#
def main(args=None):
Expand All @@ -28,6 +28,8 @@ def main(args=None):
parser.add_argument("-a","--axis", type=int, default=0, help = "index:axis=0, columns:axis=1")
# parser.add_argument("--column", type=int, help = "Column to look for index")
parser.add_argument("--index_column", type=int, default=0, help = "Index column [Default: 0]")
parser.add_argument("--index_name", type=str, help = "Add index name")

parser.add_argument("-d", "--drop_duplicates", action="store_true", help = "Drop duplicates")

parser.add_argument("--sep", type=str, default="\t", help = "Separator [Default: <tab>]")
Expand Down Expand Up @@ -83,6 +85,8 @@ def main(args=None):
df = df.drop(index, axis=1)

# Write table
if opts.index_name:
df.index.name = opts.index_name
df.to_csv(opts.output_table, sep=opts.sep, header=not opts.no_header)

if __name__ == "__main__":
Expand Down

0 comments on commit 3ed2910

Please sign in to comment.