Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 22 additions & 1 deletion tools/AutoTuner/src/autotuner/distributed.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@
CONSTRAINTS_SDC,
FASTROUTE_TCL,
)
from autotuner.tensorboard_logger import TensorBoardLogger

# Name of the final metric
METRIC = "metric"
Expand Down Expand Up @@ -566,6 +567,14 @@ def sweep():
else:
repo_dir = os.path.abspath(os.path.join(ORFS_FLOW_DIR, ".."))
print(f"[INFO TUN-0012] Log folder {LOCAL_DIR}.")

tb_log_dir = os.path.join(LOCAL_DIR, args.experiment)
print(
f"[INFO TUN-0034] TensorBoard logging enabled. Run: tensorboard --logdir={tb_log_dir}"
)

tb_logger = TensorBoardLogger.remote(log_dir=tb_log_dir)

queue = Queue()
parameter_list = list()
for name, content in config_dict.items():
Expand All @@ -581,10 +590,22 @@ def sweep():
temp = dict()
for value in parameter:
temp.update(value)
queue.put([args, repo_dir, temp, SDC_ORIGINAL, FR_ORIGINAL, INSTALL_PATH])
queue.put(
[
args,
repo_dir,
temp,
SDC_ORIGINAL,
FR_ORIGINAL,
INSTALL_PATH,
tb_logger,
]
)
workers = [consumer.remote(queue) for _ in range(args.jobs)]
print("[INFO TUN-0009] Waiting for results.")
ray.get(workers)
ray.get(tb_logger.close.remote())
print(f"[INFO TUN-0035] TensorBoard events written to {tb_log_dir}")
print("[INFO TUN-0010] Sweep complete.")


Expand Down
65 changes: 65 additions & 0 deletions tools/AutoTuner/src/autotuner/tensorboard_logger.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import logging
import os
from typing import Any, Union

import ray
from tensorboardX import SummaryWriter

logger = logging.getLogger(__name__)


@ray.remote
class TensorBoardLogger:
"""TensorBoard logger for AutoTuner experiments"""

def __init__(self, log_dir: str):
os.makedirs(log_dir, exist_ok=True)
self.writer = SummaryWriter(log_dir=log_dir)
self.log_dir = log_dir
self.step = 0
logger.info(f"TensorBoard logs will be written to {log_dir}")

def log_sweep_metrics(
self,
params: dict[str, Any],
metrics: dict[str, Any],
score: float,
effective_clk_period: Union[float, str],
num_drc: Union[int, str],
die_area: Union[float, str],
) -> None:
"""Log metrics from a single sweep run"""
self.writer.add_scalar("sweep/score", score, self.step)

if effective_clk_period != "-":
self.writer.add_scalar(
"sweep/effective_clk_period", effective_clk_period, self.step
)

if num_drc != "-":
self.writer.add_scalar("sweep/num_drc", num_drc, self.step)

if die_area != "-":
self.writer.add_scalar("sweep/die_area", die_area, self.step)

for key, value in metrics.items():
if isinstance(value, (int, float)):
self.writer.add_scalar(f"metrics/{key}", value, self.step)

self.writer.add_hparams(
{
k: v if isinstance(v, (int, float, str, bool)) else str(v)
for k, v in params.items()
},
{"hparam/score": score if score != 999999.0 else 0.0},
)

self.step += 1

def close(self) -> None:
"""Close the TensorBoard writer and log completion message"""
self.writer.close()
logger.info(
f"Sweep complete. View results with: tensorboard --logdir={self.log_dir}"
)
logger.info(f"Total runs logged: {self.step}")
37 changes: 31 additions & 6 deletions tools/AutoTuner/src/autotuner/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -669,9 +669,34 @@ def openroad_distributed(
@ray.remote
def consumer(queue):
"""consumer"""
while not queue.empty():
next_item = queue.get()
name = next_item[1]
print(f"[INFO TUN-0007] Scheduling run for parameter {name}.")
ray.get(openroad_distributed.remote(*next_item))
print(f"[INFO TUN-0008] Finished run for parameter {name}.")
item = queue.get()
tb_logger = item[6]

while item:
args, repo_dir, config, sdc, fr, install, tb_logger = item
print(f"[INFO TUN-0007] Scheduling run for parameter {config}.")
metric_file, _ = ray.get(
openroad_distributed.remote(args, repo_dir, config, sdc, fr, install)
)
print(f"[INFO TUN-0008] Finished run for parameter {config}.")

metrics = read_metrics(metric_file, args.stop_stage)
effective_clk_period = (
metrics["clk_period"] - metrics["worst_slack"]
if metrics["worst_slack"] not in ("ERR", "N/A")
else "-"
)
score = effective_clk_period if effective_clk_period != "-" else 999999.0

ray.get(
tb_logger.log_sweep_metrics.remote(
params=config,
metrics=metrics,
score=score,
effective_clk_period=effective_clk_period,
num_drc=metrics.get("num_drc", "-"),
die_area=metrics.get("die_area", "-"),
)
)

item = queue.get() if not queue.empty() else None