Skip to content

Commit 230bfb4

Browse files
authored
Avoid modifying log level globally (#1944)
* Avoid modifying log level globally * Address get_job_logs * Fix integration tests
1 parent 2a91d83 commit 230bfb4

File tree

2 files changed

+17
-12
lines changed

2 files changed

+17
-12
lines changed

sdk/python/kubeflow/training/api/training_client.py

+14-8
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,7 @@
2424
from kubeflow.training.constants import constants
2525
from kubeflow.training.utils import utils
2626

27-
logging.basicConfig(format="%(message)s")
28-
logging.getLogger().setLevel(logging.INFO)
27+
logger = logging.getLogger(__name__)
2928

3029
status_logger = utils.StatusLogger(
3130
header="{:<30.30} {:<20.20} {}".format("NAME", "STATE", "TIME"),
@@ -222,7 +221,7 @@ def create_job(
222221
f"Failed to create {job_kind}: {namespace}/{job.metadata.name}"
223222
)
224223

225-
logging.info(f"{job_kind} {namespace}/{job.metadata.name} has been created")
224+
logger.debug(f"{job_kind} {namespace}/{job.metadata.name} has been created")
226225

227226
def get_job(
228227
self,
@@ -771,7 +770,7 @@ def get_job_logs(
771770
replica_index: Optional[int] = None,
772771
follow: bool = False,
773772
timeout: int = constants.DEFAULT_TIMEOUT,
774-
):
773+
) -> Dict[str, str]:
775774
"""Print the training logs for the Job. By default it returns logs from
776775
the `master` pod.
777776
@@ -801,6 +800,10 @@ def get_job_logs(
801800
timeout: Optional, Kubernetes API server timeout in seconds
802801
to execute the request.
803802
803+
Returns:
804+
Dict[str, str]: A dictionary in which the keys are pod names and the
805+
values are the corresponding logs.
806+
804807
Raises:
805808
ValueError: Job replica type is invalid.
806809
TimeoutError: Timeout to get Job pods.
@@ -819,6 +822,7 @@ def get_job_logs(
819822
timeout=timeout,
820823
)
821824

825+
logs_dict = {}
822826
if pods and follow:
823827
log_streams = []
824828
for pod in pods:
@@ -849,7 +853,7 @@ def get_job_logs(
849853
if logline is None:
850854
finished[index] = True
851855
break
852-
logging.info("[Pod %s]: %s", pods[index], logline)
856+
print(f"[Pod {pods[index]}]: {logline}")
853857
except queue.Empty:
854858
break
855859
elif pods:
@@ -860,10 +864,12 @@ def get_job_logs(
860864
namespace,
861865
container=constants.JOB_PARAMETERS[job_kind]["container"],
862866
)
863-
logging.info("The logs of pod %s:\n %s", pod, pod_logs)
867+
logs_dict[pod] = pod_logs
864868
except Exception:
865869
raise RuntimeError(f"Failed to read logs for pod {namespace}/{pod}")
866870

871+
return logs_dict
872+
867873
def update_job(
868874
self,
869875
job: constants.JOB_MODELS_TYPE,
@@ -908,7 +914,7 @@ def update_job(
908914
except Exception:
909915
raise RuntimeError(f"Failed to update {job_kind}: {namespace}/{name}")
910916

911-
logging.info(f"{job_kind} {namespace}/{name} has been updated")
917+
logger.debug(f"{job_kind} {namespace}/{name} has been updated")
912918

913919
def delete_job(
914920
self,
@@ -950,4 +956,4 @@ def delete_job(
950956
except Exception:
951957
raise RuntimeError(f"Failed to delete {job_kind}: {namespace}/{name}")
952958

953-
logging.info(f"{job_kind} {namespace}/{name} has been deleted")
959+
logger.debug(f"{job_kind} {namespace}/{name} has been deleted")

sdk/python/kubeflow/training/utils/utils.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,7 @@
2525
from kubeflow.training import models
2626

2727

28-
logging.basicConfig(format="%(message)s")
29-
logging.getLogger().setLevel(logging.INFO)
28+
logger = logging.getLogger(__name__)
3029

3130

3231
class StatusLogger:
@@ -39,9 +38,9 @@ def __init__(self, header, column_format):
3938

4039
def __call__(self, *values):
4140
if self.first_call:
42-
logging.info(self.header)
41+
logger.debug(self.header)
4342
self.first_call = False
44-
logging.info(self.column_format.format(*values))
43+
logger.debug(self.column_format.format(*values))
4544

4645

4746
class FakeResponse:

0 commit comments

Comments
 (0)