From 7abbb1456dbb6725c451d16691078c7310885a1d Mon Sep 17 00:00:00 2001
From: darcy220606 <ananhamido@hotmail.com>
Date: Fri, 10 Jan 2025 09:17:10 +0100
Subject: [PATCH 01/11] fix merge taxonomy scripts and outputs

---
 bin/merge_taxonomy.py | 137 ++++++++++++++++++++++++++++++------------
 1 file changed, 98 insertions(+), 39 deletions(-)

diff --git a/bin/merge_taxonomy.py b/bin/merge_taxonomy.py
index 44eed31a..d492e4b4 100755
--- a/bin/merge_taxonomy.py
+++ b/bin/merge_taxonomy.py
@@ -3,7 +3,7 @@
 # Written by Anan Ibrahim and released under the MIT license.
 # See git repository (https://github.com/Darcy220606/AMPcombi) for full license text.
 # Date: March 2024
-# Version: 0.1.0
+# Version: 0.1.1
 
 # Required modules
 import sys
@@ -12,7 +12,7 @@
 import numpy as np
 import argparse
 
-tool_version = "0.1.0"
+tool_version = "0.1.1"
 #########################################
 # TOP LEVEL: AMPCOMBI
 #########################################
@@ -66,6 +66,15 @@
 # TAXONOMY
 #########################################
 def reformat_mmseqs_taxonomy(mmseqs_taxonomy):
+    """_summary_
+    Reformats the taxonomy files and joins them in a list to be passed on to the tools functions
+
+    Args:
+        mmseqs_taxonomy (tsv): mmseqs output file per sample
+
+    Returns:
+        data frame: reformated tables
+    """
     mmseqs2_df = pd.read_csv(mmseqs_taxonomy, sep='\t', header=None, names=['contig_id', 'taxid', 'rank_label', 'scientific_name', 'lineage', 'mmseqs_lineage_contig'])
     # remove the lineage column
     mmseqs2_df.drop('lineage', axis=1, inplace=True)
@@ -85,7 +94,19 @@ def reformat_mmseqs_taxonomy(mmseqs_taxonomy):
 # FUNCTION: AMPCOMBI
 #########################################
 def ampcombi_taxa(args):
-    merged_df = pd.DataFrame()
+    """_summary_
+    Merges AMPcombi tool output with taxonomy information.
+
+    Parameters:
+    ----------
+    args:
+        Contains arguments for AMPcombi file path (`amp`) and list of taxonomy file paths (`taxa1`).
+
+    Outputs:
+    -------
+    Creates a file named `ampcombi_complete_summary_taxonomy.tsv` containing the merged results.
+    """
+    combined_dfs = []
 
     # assign input args to variables
     ampcombi = args.amp
@@ -100,13 +121,6 @@ def ampcombi_taxa(args):
 
     # filter the tool df
     tool_df = pd.read_csv(ampcombi, sep='\t')
-    # remove the column with contig_id - duplicate #NOTE: will be fixed in AMPcombi v2.0.0
-    tool_df = tool_df.drop('contig_id', axis=1)
-    # make sure 1st and 2nd column have the same column labels
-    tool_df.rename(columns={tool_df.columns[0]: 'sample_id'}, inplace=True)
-    tool_df.rename(columns={tool_df.columns[1]: 'contig_id'}, inplace=True)
-    # grab the real contig id in another column copy for merging
-    tool_df['contig_id_merge'] = tool_df['contig_id'].str.rsplit('_', 1).str[0]
 
     # merge rows from taxa to ampcombi_df based on substring match in sample_id
     # grab the unique sample names from the taxonomy table
@@ -114,17 +128,18 @@ def ampcombi_taxa(args):
     # for every sampleID in taxadf merge the results
     for sampleID in samples_taxa:
         # subset ampcombi
-        subset_tool = tool_df.loc[tool_df['sample_id'].str.contains(sampleID)]
+        subset_tool = tool_df[tool_df['sample_id'].str.contains(sampleID, na=False)]
         # subset taxa
-        subset_taxa = taxa_df.loc[taxa_df['sample_id'].str.contains(sampleID)]
+        subset_taxa = taxa_df[taxa_df['sample_id'].str.contains(sampleID, na=False)]
         # merge
-        subset_df = pd.merge(subset_tool, subset_taxa, left_on = 'contig_id_merge', right_on='contig_id', how='left')
+        subset_df = pd.merge(subset_tool, subset_taxa, on='contig_id', how='left')
         # cleanup the table
-        columnsremove = ['contig_id_merge','contig_id_y', 'sample_id_y']
+        columnsremove = ['sample_id_y']
         subset_df.drop(columnsremove, axis=1, inplace=True)
-        subset_df.rename(columns={'contig_id_x': 'contig_id', 'sample_id_x':'sample_id'},inplace=True)
+        subset_df.rename(columns={'sample_id_x':'sample_id'},inplace=True)
         # append in the combined_df
-        merged_df = merged_df.append(subset_df, ignore_index=True)
+        combined_dfs.append(subset_df)
+    merged_df = pd.concat(combined_dfs, ignore_index=True)
 
     # write to file
     merged_df.to_csv('ampcombi_complete_summary_taxonomy.tsv', sep='\t', index=False)
@@ -133,7 +148,20 @@ def ampcombi_taxa(args):
 # FUNCTION: COMBGC
 #########################################
 def combgc_taxa(args):
-    merged_df = pd.DataFrame()
+    """_summary_
+
+    Merges comBGC tool output with taxonomy information.
+
+    Parameters:
+    ----------
+    args:
+        Contains arguments for comBGC file path (`bgc`) and list of taxonomy file paths (`taxa2`).
+
+    Outputs:
+    -------
+    Creates a file named `combgc_complete_summary_taxonomy.tsv` containing the merged results.
+    """
+    combined_dfs = []
 
     # assign input args to variables
     combgc = args.bgc
@@ -152,23 +180,24 @@ def combgc_taxa(args):
     tool_df.rename(columns={tool_df.columns[0]: 'sample_id'}, inplace=True)
     tool_df.rename(columns={tool_df.columns[1]: 'contig_id'}, inplace=True)
 
-    # merge rows from taxa to ampcombi_df based on substring match in sample_id
+    # merge rows from taxa to combgc_df based on substring match in sample_id
     # grab the unique sample names from the taxonomy table
     samples_taxa = taxa_df['sample_id'].unique()
     # for every sampleID in taxadf merge the results
     for sampleID in samples_taxa:
-        # subset ampcombi
-        subset_tool = tool_df.loc[tool_df['sample_id'].str.contains(sampleID)]
+        # subset tool
+        subset_tool = tool_df[tool_df['sample_id'].str.contains(sampleID, na=False)]
         # subset taxa
-        subset_taxa = taxa_df.loc[taxa_df['sample_id'].str.contains(sampleID)]
+        subset_taxa = taxa_df[taxa_df['sample_id'].str.contains(sampleID, na=False)]
         # merge
-        subset_df = pd.merge(subset_tool, subset_taxa, left_on = 'contig_id', right_on='contig_id', how='left')
+        subset_df = pd.merge(subset_tool, subset_taxa, on='contig_id', how='left')
         # cleanup the table
         columnsremove = ['sample_id_y']
         subset_df.drop(columnsremove, axis=1, inplace=True)
         subset_df.rename(columns={'sample_id_x':'sample_id'},inplace=True)
         # append in the combined_df
-        merged_df = merged_df.append(subset_df, ignore_index=True)
+        combined_dfs.append(subset_df)
+    merged_df = pd.concat(combined_dfs, ignore_index=True)
 
     # write to file
     merged_df.to_csv('combgc_complete_summary_taxonomy.tsv', sep='\t', index=False)
@@ -176,8 +205,21 @@ def combgc_taxa(args):
 #########################################
 # FUNCTION: HAMRONIZATION
 #########################################
+# TODO : FIX THE MERGING in ARG pipeline
 def hamronization_taxa(args):
-    merged_df = pd.DataFrame()
+    """_summary_
+    Merges Hamronization tool output with taxonomy information.
+
+    Parameters:
+    ----------
+    args:
+        Contains arguments for hamronization file path (`arg`) and list of taxonomy file paths (`taxa2`).
+
+    Outputs:
+    -------
+    Creates a file named `hamronization_complete_summary_taxonomy.tsv` containing the merged results.
+    """
+    combined_dfs = []
 
     # assign input args to variables
     hamronization = args.arg
@@ -197,29 +239,46 @@ def hamronization_taxa(args):
     # reorder the columns
     new_order = ['sample_id', 'contig_id'] + [col for col in tool_df.columns if col not in ['sample_id', 'contig_id']]
     tool_df = tool_df.reindex(columns=new_order)
-    # grab the real contig id in another column copy for merging
-    tool_df['contig_id_merge'] = tool_df['contig_id'].str.rsplit('_', 1).str[0]
 
-    # merge rows from taxa to ampcombi_df based on substring match in sample_id
+    # merge rows from taxa to hamronization_df based on substring match in sample_id
     # grab the unique sample names from the taxonomy table
     samples_taxa = taxa_df['sample_id'].unique()
     # for every sampleID in taxadf merge the results
     for sampleID in samples_taxa:
-        # subset ampcombi
-        subset_tool = tool_df.loc[tool_df['sample_id'].str.contains(sampleID)]
+        # subset tool
+        subset_tool = tool_df[tool_df['sample_id'].str.contains(sampleID, na=False)]
         # subset taxa
-        subset_taxa = taxa_df.loc[taxa_df['sample_id'].str.contains(sampleID)]
-        # merge
-        subset_df = pd.merge(subset_tool, subset_taxa, left_on = 'contig_id_merge', right_on='contig_id', how='left')
-        # cleanup the table
-        columnsremove = ['contig_id_merge','contig_id_y', 'sample_id_y']
-        subset_df.drop(columnsremove, axis=1, inplace=True)
-        subset_df.rename(columns={'contig_id_x': 'contig_id', 'sample_id_x':'sample_id'},inplace=True)
-        # append in the combined_df
-        merged_df = merged_df.append(subset_df, ignore_index=True)
+        subset_taxa = taxa_df[taxa_df['sample_id'].str.contains(sampleID, na=False)]
+        # ensure strings
+        subset_tool['contig_id'] = subset_tool['contig_id'].astype(str)
+        subset_taxa['contig_id'] = subset_taxa['contig_id'].astype(str)
+        # rename columns to avoid droping of mutual ones
+        rename_dict = {col: f"{col}_taxa" for col in subset_taxa.columns if col in subset_tool.columns}
+        subset_taxa = subset_taxa.rename(columns=rename_dict)
+
+        # merge by string
+        merged_rows = []
+        # iterate and find all matches
+        for _, tool_row in subset_tool.iterrows():
+            tool_contig_id = tool_row['contig_id']
+            matches = subset_taxa[subset_taxa['contig_id_taxa'].apply(lambda x: str(x) in tool_contig_id)]
+            # if match, merge row
+            if not matches.empty:
+                for _, taxa_row in matches.iterrows():
+                    merged_row = {**tool_row.to_dict(), **taxa_row.to_dict()}
+                    merged_rows.append(merged_row)
+            else:
+                # if no match keep row as is
+                merged_row = {**tool_row.to_dict()}
+                merged_rows.append(merged_row)
+
+        merged_df = pd.DataFrame(merged_rows)
+        combined_dfs.append(merged_df)
+
+    merged_df_final = pd.concat(combined_dfs, ignore_index=True)
 
     # write to file
-    merged_df.to_csv('hamronization_complete_summary_taxonomy.tsv', sep='\t', index=False)
+    merged_df_final.to_csv('hamronization_complete_summary_taxonomy.tsv', sep='\t', index=False)
 
 #########################################
 # SUBPARSERS: DEFAULT

From e7630e81549c84366369f22de50b0d2339040beb Mon Sep 17 00:00:00 2001
From: darcy220606 <ananhamido@hotmail.com>
Date: Fri, 10 Jan 2025 09:18:55 +0100
Subject: [PATCH 02/11] fix ARG TABIX output file name

---
 conf/modules.config | 1 +
 1 file changed, 1 insertion(+)

diff --git a/conf/modules.config b/conf/modules.config
index d4e473d2..7999b58a 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -534,6 +534,7 @@ process {
     }
 
     withName: ARG_TABIX_BGZIP {
+        ext.prefix = { "hamronization_complete_summary_taxonomy" }
         publishDir = [
             path: { "${params.outdir}/reports/hamronization_summarize" },
             mode: params.publish_dir_mode,

From 681906c42e4e934060183cbf62ffc1d181c34421 Mon Sep 17 00:00:00 2001
From: darcy220606 <ananhamido@hotmail.com>
Date: Fri, 10 Jan 2025 09:31:03 +0100
Subject: [PATCH 03/11] fix output declaration for args

---
 docs/output.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/output.md b/docs/output.md
index 0920236c..686ef2dc 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -522,7 +522,7 @@ Note that filtered FASTA is only used for BGC workflow for run-time optimisation
 - `hamronization_summarize/` one of the following:
   - `hamronization_combined_report.json`: summarised output in .json format
   - `hamronization_combined_report.tsv`: summarised output in .tsv format when the taxonomic classification is turned off (pipeline default).
-  - `hamronization_combined_report.tsv.gz`: summarised output in gzipped format when the taxonomic classification is turned on by `--run_taxa_classification`.
+  - `hamronization_complete_summary_taxonomy.tsv.gz`: summarised output in gzipped format when the taxonomic classification is turned on by `--run_taxa_classification`.
   - `hamronization_combined_report.html`: interactive output in .html format
 
 </details>

From a039619bcac5a901df190885f92c693f5ea0ccbd Mon Sep 17 00:00:00 2001
From: darcy220606 <ananhamido@hotmail.com>
Date: Fri, 10 Jan 2025 09:39:08 +0100
Subject: [PATCH 04/11] update CHANGELOG

---
 CHANGELOG.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index a27c12d3..9227c5b1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 - [#427](https://github.com/nf-core/funcscan/pull/427) Fixed the AMP reference database issues reported by users, due to non-ASCII characters. (by @darcy220606)
 - [#430](https://github.com/nf-core/funcscan/pull/430) Updated `rgi/main` module to fix incorrect variable name. (by @amizeranschi and @jasmezz)
+- [#435](https://github.com/nf-core/funcscan/pull/435) Fixed dependency errors within taxonomy merging scripts and updated the code and output for all three workflows. Bumped to version 0.1.1. (by @darcy220606)
 
 ### `Dependencies`
 
@@ -24,6 +25,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 | Macrel   | 1.2.0            | 1.4.0       |
 | MultiQC  | 1.24.0           | 1.25.1      |
 
+
 ### `Deprecated`
 
 ## v2.0.0 - [2024-09-05]

From 35423f435200889ae2ccb4fab1b2bde9b767c1be Mon Sep 17 00:00:00 2001
From: darcy220606 <ananhamido@hotmail.com>
Date: Fri, 10 Jan 2025 10:03:17 +0100
Subject: [PATCH 05/11] fix linting

---
 CHANGELOG.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9227c5b1..84df40b8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,7 +25,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 | Macrel   | 1.2.0            | 1.4.0       |
 | MultiQC  | 1.24.0           | 1.25.1      |
 
-
 ### `Deprecated`
 
 ## v2.0.0 - [2024-09-05]

From 11a07ca24739d830d63014a08ce2df2a8db042c2 Mon Sep 17 00:00:00 2001
From: darcy220606 <ananhamido@hotmail.com>
Date: Fri, 10 Jan 2025 11:28:24 +0100
Subject: [PATCH 06/11] update nf tests

---
 tests/test_taxonomy_bakta.nf.test     | 2 +-
 tests/test_taxonomy_prokka.nf.test    | 2 +-
 tests/test_taxonomy_pyrodigal.nf.test | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/test_taxonomy_bakta.nf.test b/tests/test_taxonomy_bakta.nf.test
index 6498c4bd..5f076d29 100644
--- a/tests/test_taxonomy_bakta.nf.test
+++ b/tests/test_taxonomy_bakta.nf.test
@@ -79,7 +79,7 @@ nextflow_pipeline {
                 ).match("fargene") },
 
                 // hAMRonization
-                { assert new File("$outputDir/reports/hamronization_summarize/hamronization_combined_report.tsv.gz").exists() },
+                { assert new File("$outputDir/reports/hamronization_summarize/hamronization_complete_summary_taxonomy.tsv.gz").exists() },
 
                 // antiSMASH
                 { assert snapshot (
diff --git a/tests/test_taxonomy_prokka.nf.test b/tests/test_taxonomy_prokka.nf.test
index 0628508a..64c67b4a 100644
--- a/tests/test_taxonomy_prokka.nf.test
+++ b/tests/test_taxonomy_prokka.nf.test
@@ -79,7 +79,7 @@ nextflow_pipeline {
                 ).match("fargene") },
 
                 // hAMRonization
-                { assert new File("$outputDir/reports/hamronization_summarize/hamronization_combined_report.tsv.gz").exists() },
+                { assert new File("$outputDir/reports/hamronization_summarize/hamronization_complete_summary_taxonomy.tsv.gz").exists() },
 
                 // antiSMASH
                 { assert snapshot (
diff --git a/tests/test_taxonomy_pyrodigal.nf.test b/tests/test_taxonomy_pyrodigal.nf.test
index 8f325fc0..f0dc1012 100644
--- a/tests/test_taxonomy_pyrodigal.nf.test
+++ b/tests/test_taxonomy_pyrodigal.nf.test
@@ -79,7 +79,7 @@ nextflow_pipeline {
                 ).match("fargene") },
 
                 // hAMRonization
-                { assert new File("$outputDir/reports/hamronization_summarize/hamronization_combined_report.tsv.gz").exists() },
+                { assert new File("$outputDir/reports/hamronization_summarize/hamronization_complete_summary_taxonomy.tsv.gz").exists() },
 
                 // antiSMASH
                 { assert snapshot (

From 0e1e426972b430693431adc56d6a3c753bb9d741 Mon Sep 17 00:00:00 2001
From: Anan Ibrahim <81744003+Darcy220606@users.noreply.github.com>
Date: Wed, 15 Jan 2025 13:27:35 +0100
Subject: [PATCH 07/11] Update bin/merge_taxonomy.py

Co-authored-by: Jasmin Frangenberg <73216762+jasmezz@users.noreply.github.com>
---
 bin/merge_taxonomy.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bin/merge_taxonomy.py b/bin/merge_taxonomy.py
index d492e4b4..b3a4aa6b 100755
--- a/bin/merge_taxonomy.py
+++ b/bin/merge_taxonomy.py
@@ -73,7 +73,7 @@ def reformat_mmseqs_taxonomy(mmseqs_taxonomy):
         mmseqs_taxonomy (tsv): mmseqs output file per sample
 
     Returns:
-        data frame: reformated tables
+        data frame: reformatted tables
     """
     mmseqs2_df = pd.read_csv(mmseqs_taxonomy, sep='\t', header=None, names=['contig_id', 'taxid', 'rank_label', 'scientific_name', 'lineage', 'mmseqs_lineage_contig'])
     # remove the lineage column

From 8101fab9ba40054c53938f07a5dab3301034ad6b Mon Sep 17 00:00:00 2001
From: Anan Ibrahim <81744003+Darcy220606@users.noreply.github.com>
Date: Wed, 15 Jan 2025 13:27:52 +0100
Subject: [PATCH 08/11] Update bin/merge_taxonomy.py

Co-authored-by: Jasmin Frangenberg <73216762+jasmezz@users.noreply.github.com>
---
 bin/merge_taxonomy.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bin/merge_taxonomy.py b/bin/merge_taxonomy.py
index b3a4aa6b..49a9a970 100755
--- a/bin/merge_taxonomy.py
+++ b/bin/merge_taxonomy.py
@@ -208,7 +208,7 @@ def combgc_taxa(args):
 # TODO : FIX THE MERGING in ARG pipeline
 def hamronization_taxa(args):
     """_summary_
-    Merges Hamronization tool output with taxonomy information.
+    Merges hAMRonization tool output with taxonomy information.
 
     Parameters:
     ----------

From 727215308a2ccc18099eb7f66993c01c7c1e6eee Mon Sep 17 00:00:00 2001
From: Anan Ibrahim <81744003+Darcy220606@users.noreply.github.com>
Date: Wed, 15 Jan 2025 13:28:15 +0100
Subject: [PATCH 09/11] Update CHANGELOG.md

Co-authored-by: Jasmin Frangenberg <73216762+jasmezz@users.noreply.github.com>
---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 84df40b8..89c3fb0f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -15,7 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 - [#427](https://github.com/nf-core/funcscan/pull/427) Fixed the AMP reference database issues reported by users, due to non-ASCII characters. (by @darcy220606)
 - [#430](https://github.com/nf-core/funcscan/pull/430) Updated `rgi/main` module to fix incorrect variable name. (by @amizeranschi and @jasmezz)
-- [#435](https://github.com/nf-core/funcscan/pull/435) Fixed dependency errors within taxonomy merging scripts and updated the code and output for all three workflows. Bumped to version 0.1.1. (by @darcy220606)
+- [#435](https://github.com/nf-core/funcscan/pull/435) Fixed dependency errors within taxonomy merging scripts, updated the code and output for all three workflows. Bumped to version 0.1.1. (by @darcy220606)
 
 ### `Dependencies`
 

From 10d9504425186f89aeffca2e33bbc2c6f7b0d990 Mon Sep 17 00:00:00 2001
From: Anan Ibrahim <81744003+Darcy220606@users.noreply.github.com>
Date: Wed, 15 Jan 2025 13:28:33 +0100
Subject: [PATCH 10/11] Update bin/merge_taxonomy.py

Co-authored-by: Jasmin Frangenberg <73216762+jasmezz@users.noreply.github.com>
---
 bin/merge_taxonomy.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bin/merge_taxonomy.py b/bin/merge_taxonomy.py
index 49a9a970..ead506e5 100755
--- a/bin/merge_taxonomy.py
+++ b/bin/merge_taxonomy.py
@@ -252,7 +252,7 @@ def hamronization_taxa(args):
         # ensure strings
         subset_tool['contig_id'] = subset_tool['contig_id'].astype(str)
         subset_taxa['contig_id'] = subset_taxa['contig_id'].astype(str)
-        # rename columns to avoid droping of mutual ones
+        # rename columns to avoid dropping of mutual ones
         rename_dict = {col: f"{col}_taxa" for col in subset_taxa.columns if col in subset_tool.columns}
         subset_taxa = subset_taxa.rename(columns=rename_dict)
 

From eec773883aa97e0bc8871fc669d4bfc6a2c875a6 Mon Sep 17 00:00:00 2001
From: Anan Ibrahim <81744003+Darcy220606@users.noreply.github.com>
Date: Wed, 15 Jan 2025 13:29:26 +0100
Subject: [PATCH 11/11] Update merge_taxonomy.py

---
 bin/merge_taxonomy.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/bin/merge_taxonomy.py b/bin/merge_taxonomy.py
index ead506e5..d202bcbf 100755
--- a/bin/merge_taxonomy.py
+++ b/bin/merge_taxonomy.py
@@ -205,7 +205,6 @@ def combgc_taxa(args):
 #########################################
 # FUNCTION: HAMRONIZATION
 #########################################
-# TODO : FIX THE MERGING in ARG pipeline
 def hamronization_taxa(args):
     """_summary_
     Merges hAMRonization tool output with taxonomy information.