Merge pull request #304 from whylabs/zip-rm

Zip rm
whylabs · May 10, 2024 · fa8d5f6 · fa8d5f6
2 parents 45ece8e + d851e0a
commit fa8d5f6
Show file tree

Hide file tree

Showing 11 changed files with 291 additions and 290 deletions.
diff --git a/.bumpversion.cfg b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.0.28.dev15
+current_version = 0.0.28.dev16
 tag = False
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<build>\d+))?
 serialize = 

diff --git a/langkit/asset_downloader.py b/langkit/asset_downloader.py
@@ -4,7 +4,7 @@
 import zipfile
 from dataclasses import dataclass
 from functools import lru_cache
-from typing import cast
+from typing import List, Optional, cast
 
 import requests
 import whylabs_client
@@ -46,12 +46,27 @@ def _get_asset_path(asset_id: str, tag: str = "0") -> AssetPath:
 
 def _is_extracted(asset_id: str, tag: str = "0") -> bool:
     asset_path = _get_asset_path(asset_id, tag)
+
+    # If we can see the metadata file, we assume the asset is extracted
+    metadata_file_content = _read_asset_metadata(asset_id, tag)
+    if metadata_file_content is not None:
+        logger.info(f"Asset {asset_id} with tag {tag} already extracted")
+        # check that each file in the metadata file exists
+        for file_name in metadata_file_content:
+            if not os.path.exists(f"{asset_path.extract_path}/{file_name}"):
+                logger.info(f"Asset {asset_id} with tag {tag} not extracted, file {file_name} missing but expected")
+                return False
+        return True
+
     if not os.path.exists(asset_path.zip_path):
+        logger.info(f"Asset {asset_id} with tag {tag} not downloaded, zip file not found")
         return False
 
+    # If the zip file is still here then check if it's been extracted
     with zipfile.ZipFile(asset_path.zip_path, "r") as zip_ref:
         zip_names = set(zip_ref.namelist())
         extract_names = set(os.listdir(asset_path.extract_path))
+
     return zip_names.issubset(extract_names)
 
 
@@ -61,6 +76,25 @@ def _extract_asset(asset_id: str, tag: str = "0"):
         zip_ref.extractall(asset_path.extract_path)
 
 
+def _generate_asset_metadata(asset_id: str, tag: str = "0"):
+    """
+    Create a metadata file with a list of all of the expected files in the asset zip
+    """
+    asset_path = _get_asset_path(asset_id, tag)
+    with zipfile.ZipFile(asset_path.zip_path, "r") as zip_ref:
+        with open(f"{asset_path.extract_path}/metadata.txt", "w") as f:
+            f.write("\n".join(zip_ref.namelist()))
+
+
+def _read_asset_metadata(asset_id: str, tag: str = "0") -> Optional[List[str]]:
+    asset_path = _get_asset_path(asset_id, tag)
+    if not os.path.exists(f"{asset_path.extract_path}/metadata.txt"):
+        return None
+
+    with open(f"{asset_path.extract_path}/metadata.txt", "r") as f:
+        return f.read().split("\n")
+
+
 def _is_zip_file(file_path: str) -> bool:
     try:
         with zipfile.ZipFile(file_path, "r"):
@@ -69,6 +103,11 @@ def _is_zip_file(file_path: str) -> bool:
         return False
 
 
+def _remove_zip_file(asset_id: str, tag: str = "0"):
+    asset_path = _get_asset_path(asset_id, tag)
+    os.remove(asset_path.zip_path)
+
+
 @retry(stop=stop_after_attempt(3), wait=wait_exponential_jitter(max=5))
 def _download_asset(asset_id: str, tag: str = "0"):
     asset_path = _get_asset_path(asset_id, tag)
@@ -99,5 +138,9 @@ def get_asset(asset_id: str, tag: str = "0"):
         _download_asset(asset_id, tag)
 
     logger.info(f"Extracting asset {asset_id} with tag {tag}")
+
     _extract_asset(asset_id, tag)
+    _generate_asset_metadata(asset_id, tag)
+    _remove_zip_file(asset_id, tag)
+
     return asset_path.extract_path
diff --git a/langkit/core/metric.py b/langkit/core/metric.py
@@ -110,6 +110,7 @@ class SingleMetric:
     cache_assets: Optional[Callable[[], None]] = None
     # Maybe expose the generic on Metrics in the future, not urgent
     context_dependencies: Optional[List[ContextDependency[Any]]] = None
+    metadata: Optional[Dict[str, Any]] = None
 
 
 MultiEvaluateWithContext = Callable[[pd.DataFrame, Context], MultiMetricResult]
@@ -126,6 +127,7 @@ class MultiMetric:
     init: Optional[Callable[[], None]] = None
     cache_assets: Optional[Callable[[], None]] = None
     context_dependencies: Optional[List[ContextDependency[Any]]] = None
+    metadata: Optional[Dict[str, Any]] = None
 
 
 def invoke_evaluate_multi(

diff --git a/langkit/core/workflow.py b/langkit/core/workflow.py
@@ -3,7 +3,7 @@
 import time
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
-from typing import Dict, List, Mapping, Optional, Set, Tuple, Union, cast, overload
+from typing import Any, Dict, List, Mapping, Optional, Set, Tuple, Union, cast, overload
 
 import pandas as pd
 from typing_extensions import NotRequired, TypedDict
@@ -192,6 +192,17 @@ def get_metric_names(self) -> List[str]:
                 names.extend(metric.names)
         return names
 
+    def get_metric_metadata(self) -> Dict[str, Dict[str, Any]]:
+        metadata: Dict[str, Dict[str, Any]] = {}
+        for metric in self.metrics_config.metrics:
+            if metric.metadata:
+                if isinstance(metric, SingleMetric):
+                    metadata[metric.name] = metric.metadata
+                else:
+                    for name in metric.names:
+                        metadata[name] = metric.metadata
+        return metadata
+
     @overload
     def run(self, data: pd.DataFrame, options: Optional[RunOptions] = None) -> WorkflowResult:
         """

diff --git a/langkit/metrics/library.py b/langkit/metrics/library.py
@@ -302,31 +302,30 @@ def sentiment_score() -> MetricCreator:
                 return prompt_sentiment_polarity
 
         class topics:
-            def __init__(self, topics: List[str], hypothesis_template: Optional[str] = None, onnx: bool = True):
+            def __init__(
+                self,
+                topics: List[str],
+                hypothesis_template: Optional[str] = None,
+                onnx: bool = True,
+                hf_model: Optional[str] = None,
+                hf_model_revision: Optional[str] = None,
+            ):
                 self.topics = topics
                 self.hypothesis_template = hypothesis_template
                 self.onnx = onnx
+                self.hf_model = hf_model
+                self.hf_model_revision = hf_model_revision
 
             def __call__(self) -> MetricCreator:
-                if self.onnx:
-                    from langkit.metrics.topic_onnx import topic_metric
+                from langkit.metrics.topic import topic_metric
 
-                    return partial(topic_metric, "prompt", self.topics, self.hypothesis_template)
-                else:
-                    from langkit.metrics.topic import topic_metric
-
-                    return partial(topic_metric, "prompt", self.topics, self.hypothesis_template)
+                return partial(topic_metric, "prompt", self.topics, self.hypothesis_template, use_onnx=self.onnx)
 
             @staticmethod
             def medicine(onnx: bool = True) -> MetricCreator:
-                if onnx:
-                    from langkit.metrics.topic_onnx import topic_metric
+                from langkit.metrics.topic import topic_metric
 
-                    return lambda: topic_metric("prompt", ["medicine"])
-                else:
-                    from langkit.metrics.topic_onnx import topic_metric
-
-                    return lambda: topic_metric("prompt", ["medicine"])
+                return lambda: topic_metric("prompt", ["medicine"], use_onnx=onnx)
 
     class response:
         @staticmethod
@@ -352,19 +351,21 @@ def __call__(self) -> MetricCreator:
                 return self.toxicity_score()
 
             @staticmethod
-            def toxicity_score(onnx: bool = True) -> MetricCreator:
+            def toxicity_score(
+                onnx: bool = True, onnx_tag: Optional[str] = None, hf_model: Optional[str] = None, hf_model_revision: Optional[str] = None
+            ) -> MetricCreator:
                 """
                 Analyze the toxicity of the response. The output of this metric ranges from 0 to 1, where 0
                 indicates a non-toxic response and 1 indicates a toxic response.
                 """
                 if onnx:
                     from langkit.metrics.toxicity_onnx import response_toxicity_metric
 
-                    return response_toxicity_metric
+                    return partial(response_toxicity_metric, tag=onnx_tag)
                 else:
                     from langkit.metrics.toxicity import response_toxicity_metric
 
-                    return response_toxicity_metric
+                    return partial(response_toxicity_metric, hf_model=hf_model, hf_model_revision=hf_model_revision)
 
         class stats:
             def __call__(self) -> MetricCreator:
@@ -534,28 +535,27 @@ def context(embedding: EmbeddingChoiceArg = "default") -> MetricCreator:
                 return partial(input_context_similarity, embedding=embedding, input_column_name="response")
 
         class topics:
-            def __init__(self, topics: List[str], hypothesis_template: Optional[str] = None, onnx: bool = True):
+            def __init__(
+                self,
+                topics: List[str],
+                hypothesis_template: Optional[str] = None,
+                onnx: bool = True,
+                hf_model: Optional[str] = None,
+                hf_model_revision: Optional[str] = None,
+            ):
                 self.topics = topics
                 self.hypothesis_template = hypothesis_template
                 self.onnx = onnx
+                self.hf_model = hf_model
+                self.hf_model_revision = hf_model_revision
 
             def __call__(self) -> MetricCreator:
-                if self.onnx:
-                    from langkit.metrics.topic_onnx import topic_metric
-
-                    return partial(topic_metric, "response", self.topics, self.hypothesis_template)
-                else:
-                    from langkit.metrics.topic import topic_metric
+                from langkit.metrics.topic import topic_metric
 
-                    return partial(topic_metric, "response", self.topics, self.hypothesis_template)
+                return partial(topic_metric, "response", self.topics, self.hypothesis_template, use_onnx=self.onnx)
 
             @staticmethod
             def medicine(onnx: bool = True) -> MetricCreator:
-                if onnx:
-                    from langkit.metrics.topic_onnx import topic_metric
-
-                    return partial(topic_metric, "response", ["medicine"])
-                else:
-                    from langkit.metrics.topic_onnx import topic_metric
+                from langkit.metrics.topic import topic_metric
 
-                    return partial(topic_metric, "response", ["medicine"])
+                return partial(topic_metric, "response", ["medicine"], use_onnx=onnx)