format

michaelfeil · michaelfeil · commit 87ddc7c34406 · 2024-08-14T19:17:38.000-06:00
diff --git a/.gitmodules b/.gitmodules
@@ -1,3 +1,6 @@
 [submodule "runpod-worker"]
 	path = infra/runpod
 	url = https://github.com/runpod-workers/worker-infinity-embedding
+[submodule "infra/sap/sap-core-ai"]
+	path = infra/sap/sap-core-ai
+	url = https://github.com/SAP-samples/btp-generative-ai-hub-use-cases
diff --git a/libs/infinity_emb/infinity_emb/infinity_server.py b/libs/infinity_emb/infinity_emb/infinity_server.py
@@ -497,15 +497,16 @@ def _construct(name: str):
             # gets the default value from the ENV Manager
             default=getattr(MANAGER, name),
             # envvar is a dummy that is there for documentation purposes.
-            envvar=f'`{MANAGER.to_name(name)}`',
+            envvar=f"`{MANAGER.to_name(name)}`",
         )
 
     @tp.command("v2")
     def v2(
         # t
         # arguments for engine
         model_id: list[str] = typer.Option(
-            **_construct("model_id"), help="Huggingface model repo id. Subset of possible models: https://huggingface.co/models?other=text-embeddings-inference&"
+            **_construct("model_id"),
+            help="Huggingface model repo id. Subset of possible models: https://huggingface.co/models?other=text-embeddings-inference&",
         ),
         served_model_name: list[str] = typer.Option(
             **_construct("served_model_name"),
@@ -595,7 +596,7 @@ def v2(
         \n
         Multi-Model CLI Playbook: \n
         - 1. specific options can be combinedmodels. i.e. `v2 --model-id model/no1 --model-id/no2 --batch-size 8 --batch-size 4` \n
-        - 2. this is identical to setting ENV Variables to: INFINITY_MODEL_ID="model/no1;model/no2;" && INFINITY_BATCH_SIZE="8;4;" \n 
+        - 2. this is identical to setting ENV Variables to: INFINITY_MODEL_ID="model/no1;model/no2;" && INFINITY_BATCH_SIZE="8;4;" \n
         - 3. single items are broadcasted to model-id length!
         """
         # old
diff --git a/libs/infinity_emb/infinity_emb/transformer/acceleration.py b/libs/infinity_emb/infinity_emb/transformer/acceleration.py
@@ -34,13 +34,13 @@ def to_bettertransformer(
         )
         return model
 
-    if os.environ.get("INFINITY_DISABLE_OPTIMUM", False):  
+    if os.environ.get("INFINITY_DISABLE_OPTIMUM", False):
         # TODO: remove this code path, it just prints this warning
         logger.error(
             "DEPRECATED the `INFINITY_DISABLE_OPTIMUM` - setting optimizations via BetterTransformer,"
             "INFINITY_DISABLE_OPTIMUM is no longer supported, please use the CLI / ENV for that."
         )
-        
+
     if (
         hasattr(model.config, "_attn_implementation")
         and model.config._attn_implementation != "eager"
diff --git a/libs/infinity_emb/infinity_emb/transformer/crossencoder/optimum.py b/libs/infinity_emb/infinity_emb/transformer/crossencoder/optimum.py
@@ -41,7 +41,9 @@ def __init__(self, *, engine_args: EngineArgs):
             engine_args.model_name_or_path,
             execution_provider=provider,
             file_name=onnx_file.as_posix(),
-            optimize_model=not os.environ.get("INFINITY_ONNX_DISABLE_OPTIMIZE", False), # TODO: make this env variable public
+            optimize_model=not os.environ.get(
+                "INFINITY_ONNX_DISABLE_OPTIMIZE", False
+            ),  # TODO: make this env variable public
             model_class=ORTModelForSequenceClassification,
             revision=engine_args.revision,
             trust_remote_code=engine_args.trust_remote_code,
diff --git a/libs/infinity_emb/infinity_emb/transformer/embedder/optimum.py b/libs/infinity_emb/infinity_emb/transformer/embedder/optimum.py
@@ -57,7 +57,9 @@ def __init__(self, *, engine_args: EngineArgs):
             trust_remote_code=engine_args.trust_remote_code,
             execution_provider=provider,
             file_name=onnx_file.as_posix(),
-            optimize_model=not os.environ.get("INFINITY_ONNX_DISABLE_OPTIMIZE", False), # TODO: make this env variable public
+            optimize_model=not os.environ.get(
+                "INFINITY_ONNX_DISABLE_OPTIMIZE", False
+            ),  # TODO: make this env variable public
             model_class=ORTModelForFeatureExtraction,
         )
         self.model.use_io_binding = False