Skip to content

Commit

Permalink
remove __name__ from logging.getLogger() calls to use root logger
Browse files Browse the repository at this point in the history
Signed-off-by: Khaled Sulayman <[email protected]>
  • Loading branch information
khaledsulayman committed Dec 11, 2024
1 parent 81fad3c commit fe891e0
Show file tree
Hide file tree
Showing 14 changed files with 18 additions and 33 deletions.
2 changes: 1 addition & 1 deletion src/instructlab/sdg/blocks/block.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# Local
from ..registry import BlockRegistry

logger = logging.getLogger(__name__)
logger = logging.getLogger()


# This is part of the public API.
Expand Down
2 changes: 1 addition & 1 deletion src/instructlab/sdg/blocks/filterblock.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from ..registry import BlockRegistry
from .block import Block

logger = logging.getLogger(__name__)
logger = logging.getLogger()


# This is part of the public API.
Expand Down
2 changes: 1 addition & 1 deletion src/instructlab/sdg/blocks/iterblock.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from ..registry import BlockRegistry
from .block import Block

logger = logging.getLogger(__name__)
logger = logging.getLogger()


# This is part of the public API.
Expand Down
2 changes: 1 addition & 1 deletion src/instructlab/sdg/blocks/llmblock.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from ..registry import BlockRegistry, PromptRegistry
from .block import Block, BlockConfigParserError

logger = logging.getLogger(__name__)
logger = logging.getLogger()

DEFAULT_MAX_NUM_TOKENS = 4096

Expand Down
2 changes: 1 addition & 1 deletion src/instructlab/sdg/blocks/utilblocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from ..registry import BlockRegistry
from .block import Block

logger = logging.getLogger(__name__)
logger = logging.getLogger()


# This is part of the public API.
Expand Down
2 changes: 1 addition & 1 deletion src/instructlab/sdg/checkpointing.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
# First Party
from instructlab.sdg.utils import pandas

logger = logging.getLogger(__name__)
logger = logging.getLogger()


class Checkpointer:
Expand Down
7 changes: 2 additions & 5 deletions src/instructlab/sdg/datamixing.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
# when |knowledge| << |skills|
MIN_UPSAMPLE_THRESHOLD = 0.03
ALLOWED_COLS = ["id", "messages", "metadata"]
LOGGER = logging.getLogger(__name__)
LOGGER = logging.getLogger()


class DatasetListing(TypedDict):
Expand Down Expand Up @@ -739,10 +739,7 @@ def _gen_mixed_data(self, recipe, output_file_recipe, output_file_data):
self.num_procs,
)

def generate(self, logger=None):
if logger is not None:
global LOGGER # pylint: disable=global-statement
LOGGER = logger
def generate(self):
self._gen_mixed_data(
self.knowledge_recipe,
self.output_file_knowledge_recipe,
Expand Down
2 changes: 1 addition & 1 deletion src/instructlab/sdg/eval_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# First Party
from instructlab.sdg.pipeline import EVAL_PIPELINES_PKG, Pipeline

logger = logging.getLogger(__name__)
logger = logging.getLogger()


def _extract_options(text: str) -> list[Any]:
Expand Down
5 changes: 2 additions & 3 deletions src/instructlab/sdg/generate_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,7 @@ def generate_data(
document_output_dir = Path(output_dir) / f"documents-{date_suffix}"

leaf_nodes = read_taxonomy_leaf_nodes(
taxonomy, taxonomy_base, yaml_rules, document_output_dir, logger=LOGGER
taxonomy, taxonomy_base, yaml_rules, document_output_dir
)
if not leaf_nodes:
raise GenerateException("Error: No new leaf nodes found in the taxonomy.")
Expand Down Expand Up @@ -406,7 +406,6 @@ def generate_data(
document_output_dir,
model_name,
docling_model_path=docling_model_path,
logger=LOGGER,
)

if not samples:
Expand Down Expand Up @@ -458,7 +457,7 @@ def generate_data(
system_prompt,
)

mixer.generate(logger=LOGGER)
mixer.generate()

generate_duration = time.time() - generate_start
LOGGER.info(f"Generation took {generate_duration:.2f}s")
Expand Down
8 changes: 2 additions & 6 deletions src/instructlab/sdg/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from .blocks.block import Block
from .registry import BlockRegistry

LOGGER = logging.getLogger(__name__)
LOGGER = logging.getLogger()


# This is part of the public API.
Expand Down Expand Up @@ -134,16 +134,12 @@ def from_file(cls, ctx, pipeline_yaml):
pipeline_yaml = os.path.join(resources.files(__package__), pipeline_yaml)
return cls(ctx, pipeline_yaml, *_parse_pipeline_config_file(pipeline_yaml))

def generate(self, dataset, checkpoint_name=None, logger=None) -> Dataset:
def generate(self, dataset, checkpoint_name=None) -> Dataset:
"""
Generate the dataset by running the pipeline steps.
dataset: the input dataset
checkpoint_name: unique subdir name for the checkpoint within checkpoint_dir
"""

if logger is not None:
global LOGGER # pylint: disable=global-statement
LOGGER = logger
# The checkpointer allows us to resume from where we left off
# Saving the output of pipe instances along the way
checkpoint_dir = None
Expand Down
2 changes: 1 addition & 1 deletion src/instructlab/sdg/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# Third Party
from jinja2 import Environment, StrictUndefined, Template

logger = logging.getLogger(__name__)
logger = logging.getLogger()


class BlockRegistry:
Expand Down
2 changes: 1 addition & 1 deletion src/instructlab/sdg/utils/chunkers.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
# First Party
from instructlab.sdg.utils.model_formats import is_model_gguf, is_model_safetensors

logger = logging.getLogger(__name__)
logger = logging.getLogger()
_DEFAULT_CHUNK_OVERLAP = 100


Expand Down
2 changes: 1 addition & 1 deletion src/instructlab/sdg/utils/model_formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
# Third Party
from gguf.constants import GGUF_MAGIC

logger = logging.getLogger(__name__)
logger = logging.getLogger()


def is_model_safetensors(model_path: pathlib.Path) -> bool:
Expand Down
11 changes: 2 additions & 9 deletions src/instructlab/sdg/utils/taxonomy.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
# Initialize the pdf parser
PDFParser = pdf_parser_v1()

LOGGER = logging.getLogger(__name__)
LOGGER = logging.getLogger()


def _is_taxonomy_file(fn: str) -> bool:
Expand Down Expand Up @@ -372,11 +372,8 @@ def read_taxonomy(


def read_taxonomy_leaf_nodes(
taxonomy, taxonomy_base, yaml_rules, document_output_dir=None, logger=None
taxonomy, taxonomy_base, yaml_rules, document_output_dir=None
):
if logger is not None:
global LOGGER # pylint: disable=global-statement
LOGGER = logger
seed_instruction_data = read_taxonomy(
taxonomy, taxonomy_base, yaml_rules, document_output_dir
)
Expand Down Expand Up @@ -466,11 +463,7 @@ def leaf_node_to_samples(
document_output_dir,
model_name,
docling_model_path=None,
logger=None,
):
if logger is not None:
global LOGGER # pylint: disable=global-statement
LOGGER = logger
if not leaf_node:
return []
if leaf_node[0].get("documents"):
Expand Down

0 comments on commit fe891e0

Please sign in to comment.