From 1d50c06ded0e9148dcb8a200a009a7d9e74d3bd5 Mon Sep 17 00:00:00 2001
From: antsh3k <antshek@hotmail.com>
Date: Tue, 5 Mar 2024 20:06:06 +0000
Subject: [PATCH] Flag typing bug issue

---
 medcat/evaluate_mct_export/mct_analysis.py | 121 ++++++++++-----------
 1 file changed, 59 insertions(+), 62 deletions(-)

diff --git a/medcat/evaluate_mct_export/mct_analysis.py b/medcat/evaluate_mct_export/mct_analysis.py
index 17cf1e7..0f7d87c 100644
--- a/medcat/evaluate_mct_export/mct_analysis.py
+++ b/medcat/evaluate_mct_export/mct_analysis.py
@@ -321,67 +321,7 @@ def _eval(self, metacat_model, mct_export) -> dict:
         result = self._eval_model(metacat_model.model, data, config=metacat_model.config, tokenizer=metacat_model.tokenizer)
 
         return {'predictions': result, 'meta_values': _}
-    
-    def generate_report(self, path: str = 'mct_report.xlsx', meta_ann=False, concept_filter: Optional[List] = None):
-        """
-        :param path: Outfile path
-        :param meta_ann: Include Meta_annotation evaluation in the summary as well
-        :param concept_filter: Filter the report to only display select concepts of interest. List of cuis.
-        :return: A full excel report for MedCATtrainer annotation work done.
-        """
-        if not self.cat:
-            raise ValueError("No model pack specified")
-        if concept_filter:
-            with pd.ExcelWriter(path) as writer:
-                print('Generating report...')
-                # array-like is allowed by documentation but not by typing
-                df = pd.DataFrame.from_dict([self.cat.get_model_card(as_dict=True)]).T.reset_index(drop=False)  # type: ignore
-                df.columns = ['MCT report', f'Generated on {date.today().strftime("%Y/%m/%d")}']  # type: ignore
-                df = pd.concat([df, pd.DataFrame([['MCT Custom filter', concept_filter]], columns=df.columns)],
-                               ignore_index = True)
-                df.to_excel(writer, index=False, sheet_name='medcat_model_card')
-                self.user_stats().to_excel(writer, index=False, sheet_name='user_stats')
-                print('Evaluating annotations...')
-                if meta_ann:
-                    ann_df = self.full_annotation_df()
-                    ann_df = ann_df[ann_df['cui'].isin(concept_filter)].reset_index(drop=True)
-                    ann_df['timestamp'] = ann_df['timestamp'].dt.tz_localize(None)  # Remove timezone information
-                    ann_df.to_excel(writer, index=False, sheet_name='annotations')
-                else:
-                    ann_df = self.annotation_df()
-                    ann_df = ann_df[ann_df['cui'].isin(concept_filter)].reset_index(drop=True)
-                    ann_df['timestamp'] = ann_df['timestamp'].dt.tz_localize(None)  # Remove timezone information
-                    ann_df.to_excel(writer, index=False, sheet_name='annotations')
-                performance_summary_df = self.concept_summary()
-                performance_summary_df = performance_summary_df[performance_summary_df['cui'].isin(concept_filter)]\
-                    .reset_index(drop=True)
-                performance_summary_df.to_excel(writer, index=False, sheet_name='concept_summary')
-                if meta_ann:
-                    print('Evaluating meta_annotations...')
-                    meta_anns_df = self.meta_anns_concept_summary()
-                    meta_anns_df = meta_anns_df[meta_anns_df['cui'].isin(concept_filter)].reset_index(drop=True)
-                    meta_anns_df.to_excel(writer, index=True, sheet_name='meta_annotations_summary')
-        else:
-            with pd.ExcelWriter(path) as writer:
-                print('Generating report...')
-                df = pd.DataFrame.from_dict([self.cat.get_model_card(as_dict=True)]).T.reset_index(drop=False)  # type: ignore
-                df.columns = ['MCT report', f'Generated on {date.today().strftime("%Y/%m/%d")}']  # type: ignore
-                df.to_excel(writer, index=False, sheet_name='medcat_model_card')
-                self.user_stats().to_excel(writer, index=False, sheet_name='user_stats')
-                print('Evaluating annotations...')
-                if meta_ann:
-                    self.full_annotation_df().to_excel(writer, index=False, sheet_name='annotations')
-                else:
-                    self.annotation_df().to_excel(writer, index=False, sheet_name='annotations')
-                self.concept_summary().to_excel(writer, index=False, sheet_name='concept_summary')
-                if meta_ann:
-                    print('Evaluating meta_annotations...')
-                    self.meta_anns_concept_summary().to_excel(writer, index=True, sheet_name='meta_annotations_summary')
 
-        return print(f"MCT report saved to: {path}")
-
-    
-''' TODO: clean uo the insert method with the meta_annotations 
     def full_annotation_df(self) -> pd.DataFrame:
         """
         DataFrame of all annotations created including meta_annotation predictions.
@@ -410,7 +350,7 @@ def full_annotation_df(self) -> pd.DataFrame:
                 else:
                     pred_meta_values.append(_meta_values.get(meta_results['predictions'][counter], np.nan))
                     counter += 1
-            meta_df.insert(int(meta_df.columns.get_loc(meta_model)) + 1, f'predict_{meta_model}', pred_meta_values)
+            meta_df.insert(int(meta_df.columns.get_loc(meta_model)) + 1, f'predict_{meta_model}', pred_meta_values) # TODO fix this line
 
         return meta_df
 
@@ -465,4 +405,61 @@ def meta_anns_concept_summary(self) -> pd.DataFrame:
         meta_anns_df = meta_anns_df.rename_axis('cui').reset_index(drop=False)
         meta_anns_df.insert(1, 'concept_name', meta_anns_df['cui'].map(self.cat.cdb.cui2preferred_name))
         return meta_anns_df
-'''
+
+    def generate_report(self, path: str = 'mct_report.xlsx', meta_ann=False, concept_filter: Optional[List] = None):
+        """
+        :param path: Outfile path
+        :param meta_ann: Include Meta_annotation evaluation in the summary as well
+        :param concept_filter: Filter the report to only display select concepts of interest. List of cuis.
+        :return: A full excel report for MedCATtrainer annotation work done.
+        """
+        if not self.cat:
+            raise ValueError("No model pack specified")
+        if concept_filter:
+            with pd.ExcelWriter(path) as writer:
+                print('Generating report...')
+                # array-like is allowed by documentation but not by typing
+                df = pd.DataFrame.from_dict([self.cat.get_model_card(as_dict=True)]).T.reset_index(drop=False)  # type: ignore
+                df.columns = ['MCT report', f'Generated on {date.today().strftime("%Y/%m/%d")}']  # type: ignore
+                df = pd.concat([df, pd.DataFrame([['MCT Custom filter', concept_filter]], columns=df.columns)],
+                               ignore_index = True)
+                df.to_excel(writer, index=False, sheet_name='medcat_model_card')
+                self.user_stats().to_excel(writer, index=False, sheet_name='user_stats')
+                print('Evaluating annotations...')
+                if meta_ann:
+                    ann_df = self.full_annotation_df()
+                    ann_df = ann_df[ann_df['cui'].isin(concept_filter)].reset_index(drop=True)
+                    ann_df['timestamp'] = ann_df['timestamp'].dt.tz_localize(None)  # Remove timezone information
+                    ann_df.to_excel(writer, index=False, sheet_name='annotations')
+                else:
+                    ann_df = self.annotation_df()
+                    ann_df = ann_df[ann_df['cui'].isin(concept_filter)].reset_index(drop=True)
+                    ann_df['timestamp'] = ann_df['timestamp'].dt.tz_localize(None)  # Remove timezone information
+                    ann_df.to_excel(writer, index=False, sheet_name='annotations')
+                performance_summary_df = self.concept_summary()
+                performance_summary_df = performance_summary_df[performance_summary_df['cui'].isin(concept_filter)]\
+                    .reset_index(drop=True)
+                performance_summary_df.to_excel(writer, index=False, sheet_name='concept_summary')
+                if meta_ann:
+                    print('Evaluating meta_annotations...')
+                    meta_anns_df = self.meta_anns_concept_summary()
+                    meta_anns_df = meta_anns_df[meta_anns_df['cui'].isin(concept_filter)].reset_index(drop=True)
+                    meta_anns_df.to_excel(writer, index=True, sheet_name='meta_annotations_summary')
+        else:
+            with pd.ExcelWriter(path) as writer:
+                print('Generating report...')
+                df = pd.DataFrame.from_dict([self.cat.get_model_card(as_dict=True)]).T.reset_index(drop=False)  # type: ignore
+                df.columns = ['MCT report', f'Generated on {date.today().strftime("%Y/%m/%d")}']  # type: ignore
+                df.to_excel(writer, index=False, sheet_name='medcat_model_card')
+                self.user_stats().to_excel(writer, index=False, sheet_name='user_stats')
+                print('Evaluating annotations...')
+                if meta_ann:
+                    self.full_annotation_df().to_excel(writer, index=False, sheet_name='annotations')
+                else:
+                    self.annotation_df().to_excel(writer, index=False, sheet_name='annotations')
+                self.concept_summary().to_excel(writer, index=False, sheet_name='concept_summary')
+                if meta_ann:
+                    print('Evaluating meta_annotations...')
+                    self.meta_anns_concept_summary().to_excel(writer, index=True, sheet_name='meta_annotations_summary')
+
+        return print(f"MCT report saved to: {path}")