Skip to content

Commit

Permalink
fix: revert unintended image downgrades
Browse files Browse the repository at this point in the history
Signed-off-by: Tomas Coufal <[email protected]>
  • Loading branch information
tumido committed Jan 22, 2025
1 parent c923f17 commit 85b212f
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 66 deletions.
2 changes: 1 addition & 1 deletion importer-pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ deploymentSpec:
value: /tmp
- name: XDG_DATA_HOME
value: /tmp
image: registry.redhat.io/rhelai1/instructlab-nvidia-rhel9@sha256:b3dc9af0244aa6b84e6c3ef53e714a316daaefaae67e28de397cd71ee4b2ac7e
image: registry.redhat.io/rhelai1/instructlab-nvidia-rhel9@sha256:05cfba1fb13ed54b1de4d021da2a31dd78ba7d8cc48e10c7fe372815899a18ae
pipelineInfo:
description: Helper pipeline to the InstructLab pipeline which allows users to seed/import
a new base model
Expand Down
8 changes: 4 additions & 4 deletions pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -648,7 +648,7 @@ deploymentSpec:
env:
- name: XDG_CACHE_HOME
value: /tmp
image: registry.redhat.io/rhelai1/instructlab-nvidia-rhel9@sha256:b3dc9af0244aa6b84e6c3ef53e714a316daaefaae67e28de397cd71ee4b2ac7e
image: registry.redhat.io/rhelai1/instructlab-nvidia-rhel9@sha256:05cfba1fb13ed54b1de4d021da2a31dd78ba7d8cc48e10c7fe372815899a18ae
exec-deletepvc:
container:
image: argostub/deletepvc
Expand Down Expand Up @@ -1371,7 +1371,7 @@ deploymentSpec:
value: /tmp
- name: JUDGE_CA_CERT_PATH
value: /tmp/cert/ca.crt
image: registry.redhat.io/rhelai1/instructlab-nvidia-rhel9@sha256:b3dc9af0244aa6b84e6c3ef53e714a316daaefaae67e28de397cd71ee4b2ac7e
image: registry.redhat.io/rhelai1/instructlab-nvidia-rhel9@sha256:05cfba1fb13ed54b1de4d021da2a31dd78ba7d8cc48e10c7fe372815899a18ae
resources:
accelerator:
count: '1'
Expand Down Expand Up @@ -1501,7 +1501,7 @@ deploymentSpec:
value: /tmp
- name: JUDGE_CA_CERT_PATH
value: /tmp/cert/ca.crt
image: registry.redhat.io/rhelai1/instructlab-nvidia-rhel9@sha256:b3dc9af0244aa6b84e6c3ef53e714a316daaefaae67e28de397cd71ee4b2ac7e
image: registry.redhat.io/rhelai1/instructlab-nvidia-rhel9@sha256:05cfba1fb13ed54b1de4d021da2a31dd78ba7d8cc48e10c7fe372815899a18ae
resources:
accelerator:
count: '1'
Expand Down Expand Up @@ -1614,7 +1614,7 @@ deploymentSpec:
value: /tmp
- name: SDG_CA_CERT_PATH
value: /tmp/cert/ca.crt
image: registry.redhat.io/rhelai1/instructlab-nvidia-rhel9@sha256:b3dc9af0244aa6b84e6c3ef53e714a316daaefaae67e28de397cd71ee4b2ac7e
image: registry.redhat.io/rhelai1/instructlab-nvidia-rhel9@sha256:05cfba1fb13ed54b1de4d021da2a31dd78ba7d8cc48e10c7fe372815899a18ae
exec-sdg-to-artifact-op:
container:
args:
Expand Down
96 changes: 39 additions & 57 deletions standalone/standalone.py
Original file line number Diff line number Diff line change
Expand Up @@ -1132,20 +1132,27 @@ def sdg_op(
sdg_path: str = "/data/sdg",
sdg_sampling_size: float = 1.0,
):
from os import getenv, path
import os
import shutil
import tempfile
import instructlab.sdg
import openai
import xdg_base_dirs
import yaml
api_key = getenv("api_key")
model = getenv("model")
endpoint = getenv("endpoint")
api_key = os.getenv("api_key")
model = os.getenv("model")
endpoint = os.getenv("endpoint")
if sdg_ca_cert := getenv("SDG_CA_CERT_PATH"):
sdg_ca_cert_path = os.getenv("SDG_CA_CERT_PATH")
use_tls = os.path.exists(sdg_ca_cert_path) and (
os.path.getsize(sdg_ca_cert_path) > 0
)
if use_tls:
import httpx
custom_http_client = httpx.Client(verify=sdg_ca_cert)
custom_http_client = httpx.Client(verify=sdg_ca_cert_path)
client = openai.OpenAI(
base_url=endpoint, api_key=api_key, http_client=custom_http_client
)
Expand Down Expand Up @@ -1184,7 +1191,7 @@ def sdg_op(
skills_recipe = "/usr/share/instructlab/sdg/default_data_recipes/skills.yaml"
def set_precomputed_skills_data_ratio(sampling_size: float, skills_recipe: str):
if path.exists(skills_recipe):
if os.path.exists(skills_recipe):
with open(skills_recipe, "r", encoding="utf-8") as file:
skills_yaml = yaml.load(file, Loader=yaml.Loader)
Expand All @@ -1200,16 +1207,11 @@ def set_precomputed_skills_data_ratio(sampling_size: float, skills_recipe: str):
except PermissionError:
print("Failed to set precomputed skills data ratio: Permission denied")
print("Attempting to move default data recipes to temporary directory")
import os
import shutil
import tempfile
import xdg_base_dirs
# Create a temporary directory
with tempfile.TemporaryDirectory() as temp_dir:
# Create a default_data_recipes directory
temp_dir = path.join(temp_dir, "default_data_recipes")
temp_dir = os.path.join(temp_dir, "default_data_recipes")
os.mkdir(temp_dir)
# Copy default_data_recipes/skills.yaml to the temporary directory
Expand All @@ -1222,7 +1224,7 @@ def set_precomputed_skills_data_ratio(sampling_size: float, skills_recipe: str):
os.path.join(str(dir), "instructlab", "sdg")
for dir in xdg_base_dirs.xdg_data_dirs()
]
temp_pipeline_dir = path.join(temp_dir, "pipeline")
temp_pipeline_dir = os.path.join(temp_dir, "pipeline")
os.mkdir(temp_pipeline_dir)
for d in data_dirs:
pipeline_path = os.path.join(d, "pipelines", pipeline)
Expand All @@ -1235,7 +1237,7 @@ def set_precomputed_skills_data_ratio(sampling_size: float, skills_recipe: str):
break
# Build new skills.yaml path
new_skills_recipe = path.join(temp_dir, "skills.yaml")
new_skills_recipe = os.path.join(temp_dir, "skills.yaml")
print(f"New skills recipe path: {new_skills_recipe}")
# Override XDG_DATA_DIRS with the temporary directory
Expand Down Expand Up @@ -1661,28 +1663,18 @@ def run_mt_bench_op(
import os
import subprocess
import httpx
import torch
from instructlab.eval.mt_bench import MTBenchEvaluator
if judge_ca_cert := os.getenv("JUDGE_CA_CERT_PATH"):
import httpx
import openai
# Create a custom HTTP client
class CustomHttpClient(httpx.Client):
def __init__(self, *args, **kwargs):
# Use the custom CA certificate
kwargs.setdefault("verify", judge_ca_cert)
super().__init__(*args, **kwargs)
# Create a new OpenAI class that uses the custom HTTP client
class CustomOpenAI(openai.OpenAI):
def __init__(self, *args, **kwargs):
custom_client = CustomHttpClient()
super().__init__(http_client=custom_client, *args, **kwargs)
# Monkey patch the OpenAI class in the openai module, so that the eval lib can use it
openai.OpenAI = CustomOpenAI
judge_api_key = os.getenv("JUDGE_API_KEY", "")
judge_model_name = os.getenv("JUDGE_NAME")
judge_endpoint = os.getenv("JUDGE_ENDPOINT")
judge_ca_cert_path = os.getenv("JUDGE_CA_CERT_PATH")
use_tls = os.path.exists(judge_ca_cert_path) and (
os.path.getsize(judge_ca_cert_path) > 0
)
judge_http_client = httpx.Client(verify=judge_ca_cert_path) if use_tls else None
def launch_vllm(
model_path: str, gpu_count: int, retries: int = 120, delay: int = 10
Expand Down Expand Up @@ -1775,10 +1767,6 @@ def shutdown_vllm(process: subprocess.Popen, timeout: int = 20):
models_list = os.listdir(models_folder)
judge_api_key = os.getenv("JUDGE_API_KEY", "")
judge_model_name = os.getenv("JUDGE_NAME")
judge_endpoint = os.getenv("JUDGE_ENDPOINT")
scores = {}
all_mt_bench_data = []
Expand Down Expand Up @@ -1814,6 +1802,7 @@ def shutdown_vllm(process: subprocess.Popen, timeout: int = 20):
server_url=vllm_server,
serving_gpus=gpu_count,
max_workers=max_workers,
http_client=judge_http_client,
)
shutdown_vllm(vllm_process)
Expand All @@ -1823,6 +1812,7 @@ def shutdown_vllm(process: subprocess.Popen, timeout: int = 20):
api_key=judge_api_key,
serving_gpus=gpu_count,
max_workers=max_workers,
http_client=judge_http_client,
)
mt_bench_data = {
Expand Down Expand Up @@ -1884,30 +1874,20 @@ def run_final_eval_op(
import os
import subprocess
import httpx
import torch
from instructlab.eval.mmlu import MMLUBranchEvaluator
from instructlab.eval.mt_bench import MTBenchBranchEvaluator
from instructlab.model.evaluate import qa_pairs_to_qna_to_avg_scores, sort_score
if judge_ca_cert := os.getenv("JUDGE_CA_CERT_PATH"):
import httpx
import openai
# Create a custom HTTP client
class CustomHttpClient(httpx.Client):
def __init__(self, *args, **kwargs):
# Use the custom CA certificate
kwargs.setdefault("verify", judge_ca_cert)
super().__init__(*args, **kwargs)
# Create a new OpenAI class that uses the custom HTTP client
class CustomOpenAI(openai.OpenAI):
def __init__(self, *args, **kwargs):
custom_client = CustomHttpClient()
super().__init__(http_client=custom_client, *args, **kwargs)
# Monkey patch the OpenAI class in the openai module, so that the eval lib can use it
openai.OpenAI = CustomOpenAI
judge_api_key = os.getenv("JUDGE_API_KEY", "")
judge_model_name = os.getenv("JUDGE_NAME")
judge_endpoint = os.getenv("JUDGE_ENDPOINT")
judge_ca_cert_path = os.getenv("JUDGE_CA_CERT_PATH")
use_tls = os.path.exists(judge_ca_cert_path) and (
os.path.getsize(judge_ca_cert_path) > 0
)
judge_http_client = httpx.Client(verify=judge_ca_cert_path) if use_tls else None
print("Starting Final Eval...")
Expand Down Expand Up @@ -2267,6 +2247,7 @@ def find_node_dataset_directories(base_dir: str):
server_url=vllm_server,
serving_gpus=gpu_count,
max_workers=max_workers,
http_client=judge_http_client,
)
shutdown_vllm(vllm_process)
Expand All @@ -2277,6 +2258,7 @@ def find_node_dataset_directories(base_dir: str):
api_key=judge_api_key,
serving_gpus=gpu_count,
max_workers=max_workers,
http_client=judge_http_client,
)
qa_pairs_and_errors.append((overall_score, qa_pairs, error_rate))
Expand Down
8 changes: 4 additions & 4 deletions utils/consts.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
PYTHON_IMAGE = "quay.io/modh/odh-generic-data-science-notebook@sha256:0efbb3ad6f8f342360cf1f002d40716a39d4c58f69163e053d5bd19b4fe732d4"
TOOLBOX_IMAGE = "registry.redhat.io/ubi9/toolbox@sha256:da31dee8904a535d12689346e65e5b00d11a6179abf1fa69b548dbd755fa2770"
OC_IMAGE = "registry.redhat.io/openshift4/ose-cli@sha256:1d5c8442a6ec745e6ae44a7738c0681f1e21aac8be76ba826c2ddf2eed8475db"
RHELAI_IMAGE = "registry.redhat.io/rhelai1/instructlab-nvidia-rhel9@sha256:b3dc9af0244aa6b84e6c3ef53e714a316daaefaae67e28de397cd71ee4b2ac7e"
PYTHON_IMAGE = "quay.io/modh/odh-generic-data-science-notebook@sha256:0efbb3ad6f8f342360cf1f002d40716a39d4c58f69163e053d5bd19b4fe732d4" # v3-2024b-20250115
TOOLBOX_IMAGE = "registry.redhat.io/ubi9/toolbox@sha256:da31dee8904a535d12689346e65e5b00d11a6179abf1fa69b548dbd755fa2770" # v9.5
OC_IMAGE = "registry.redhat.io/openshift4/ose-cli@sha256:08bdbfae224dd39c81689ee73c183619d6b41eba7ac04f0dce7ee79f50531d0b" # v4.15.0
RHELAI_IMAGE = "registry.redhat.io/rhelai1/instructlab-nvidia-rhel9@sha256:05cfba1fb13ed54b1de4d021da2a31dd78ba7d8cc48e10c7fe372815899a18ae" # v1.3.2

0 comments on commit 85b212f

Please sign in to comment.