From e1c693c9779b887a0889830d6bf5d5b98ba48f86 Mon Sep 17 00:00:00 2001 From: Jan Cuhel Date: Sat, 16 Mar 2024 22:22:30 +0100 Subject: [PATCH 01/10] Add annotator size && IOU threshold --- .../dataset_annotation/clip_annotator.py | 35 +++++++++++++++---- .../dataset_annotation/owlv2_annotator.py | 17 ++++++++- .../generate_dataset_from_scratch.py | 20 +++++++++-- 3 files changed, 63 insertions(+), 9 deletions(-) diff --git a/datadreamer/dataset_annotation/clip_annotator.py b/datadreamer/dataset_annotation/clip_annotator.py index facc1d1..e505d5e 100644 --- a/datadreamer/dataset_annotation/clip_annotator.py +++ b/datadreamer/dataset_annotation/clip_annotator.py @@ -8,7 +8,7 @@ from PIL import Image from transformers import CLIPModel, CLIPProcessor -from datadreamer.dataset_annotation.image_annotator import BaseAnnotator +from datadreamer.dataset_annotation.image_annotator import BaseAnnotator, TaskList class CLIPAnnotator(BaseAnnotator): @@ -19,8 +19,11 @@ class CLIPAnnotator(BaseAnnotator): clip (CLIPModel): The CLIP model for image-text similarity evaluation. clip_processor (CLIPProcessor): The processor for preparing inputs to the CLIP model. device (str): The device on which the model will run ('cuda' for GPU, 'cpu' for CPU). + size (str): The size of the CLIP model to use ('base' or 'large'). Methods: + _init_processor(): Initializes the CLIP processor. + _init_model(): Initializes the CLIP model. annotate_batch(image, prompts, conf_threshold, use_tta, synonym_dict): Annotates the given image with bounding boxes and labels. release(empty_cuda_cache): Releases resources and optionally empties the CUDA cache. """ @@ -29,6 +32,7 @@ def __init__( self, seed: float = 42, device: str = "cuda", + size: str = "base", ) -> None: """Initializes the CLIPAnnotator with a specific seed and device. @@ -36,14 +40,33 @@ def __init__( seed (float): Seed for reproducibility. Defaults to 42. device (str): The device to run the model on. Defaults to 'cuda'. """ - super().__init__(seed) - self.clip = CLIPModel.from_pretrained("openai/clip-vit-base-patch32") - self.clip_processor = CLIPProcessor.from_pretrained( - "openai/clip-vit-base-patch32" - ) + super().__init__(seed, task_definition=TaskList.CLASSIFICATION) + self.size = size + self.clip = self._init_model() + self.clip_processor = self._init_processor() self.device = device self.clip.to(self.device) + def _init_processor(self): + """Initializes the CLIP processor. + + Returns: + CLIPProcessor: The initialized CLIP processor. + """ + if self.size == "large": + return CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14") + return CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32") + + def _init_model(self): + """Initializes the CLIP model. + + Returns: + CLIPModel: The initialized CLIP model. + """ + if self.size == "large": + return CLIPModel.from_pretrained("openai/clip-vit-large-patch14") + return CLIPModel.from_pretrained("openai/clip-vit-base-patch32") + def annotate_batch( self, images: List[PIL.Image.Image], diff --git a/datadreamer/dataset_annotation/owlv2_annotator.py b/datadreamer/dataset_annotation/owlv2_annotator.py index 2a84875..6cbf183 100644 --- a/datadreamer/dataset_annotation/owlv2_annotator.py +++ b/datadreamer/dataset_annotation/owlv2_annotator.py @@ -20,6 +20,7 @@ class OWLv2Annotator(BaseAnnotator): model (Owlv2ForObjectDetection): The OWLv2 model for object detection. processor (Owlv2Processor): The processor for the OWLv2 model. device (str): The device on which the model will run ('cuda' for GPU, 'cpu' for CPU). + size (str): The size of the OWLv2 model to use ('base' or 'large'). Methods: _init_model(): Initializes the OWLv2 model. @@ -32,6 +33,7 @@ def __init__( self, seed: float = 42, device: str = "cuda", + size: str = "base", ) -> None: """Initializes the OWLv2Annotator with a specific seed and device. @@ -40,6 +42,7 @@ def __init__( device (str): The device to run the model on. Defaults to 'cuda'. """ super().__init__(seed) + self.size = size self.model = self._init_model() self.processor = self._init_processor() self.device = device @@ -51,6 +54,10 @@ def _init_model(self): Returns: Owlv2ForObjectDetection: The initialized OWLv2 model. """ + if self.size == "large": + return Owlv2ForObjectDetection.from_pretrained( + "google/owlv2-large-patch14-ensemble" + ) return Owlv2ForObjectDetection.from_pretrained( "google/owlv2-base-patch16-ensemble" ) @@ -61,6 +68,10 @@ def _init_processor(self): Returns: Owlv2Processor: The initialized processor. """ + if self.size == "large": + return Owlv2Processor.from_pretrained( + "google/owlv2-large-patch14-ensemble", do_pad=False, do_resize=False + ) return Owlv2Processor.from_pretrained( "google/owlv2-base-patch16-ensemble", do_pad=False, do_resize=False ) @@ -145,6 +156,7 @@ def annotate_batch( images: List[PIL.Image.Image], prompts: List[str], conf_threshold: float = 0.1, + iou_threshold: float = 0.2, use_tta: bool = False, synonym_dict: dict[str, List[str]] | None = None, ) -> Tuple[List[np.ndarray], List[np.ndarray], List[np.ndarray]]: @@ -154,6 +166,7 @@ def annotate_batch( images: The images to be annotated. prompts: Prompts to guide the annotation. conf_threshold (float, optional): Confidence threshold for the annotations. Defaults to 0.1. + iou_threshold (float, optional): Intersection over union threshold for non-maximum suppression. Defaults to 0.2. use_tta (bool, optional): Flag to apply test-time augmentation. Defaults to False. synonym_dict (dict, optional): Dictionary for handling synonyms in labels. Defaults to None. @@ -233,7 +246,9 @@ def annotate_batch( # output is a list of detections, each item is one tensor with shape (num_boxes, 6), 6 is for [xyxy, conf, cls]. output = non_max_suppression( - all_boxes_cat.unsqueeze(0), conf_thres=conf_threshold, iou_thres=0.2 + all_boxes_cat.unsqueeze(0), + conf_thres=conf_threshold, + iou_thres=iou_threshold, ) output_boxes = output[0][:, :4] diff --git a/datadreamer/pipelines/generate_dataset_from_scratch.py b/datadreamer/pipelines/generate_dataset_from_scratch.py index 7fcb87c..911f6bd 100644 --- a/datadreamer/pipelines/generate_dataset_from_scratch.py +++ b/datadreamer/pipelines/generate_dataset_from_scratch.py @@ -127,6 +127,13 @@ def parse_args(): help="Confidence threshold for annotation", ) + parser.add_argument( + "--annotation_iou_threshold", + type=float, + default=0.2, + help="Intersection over Union (IoU) threshold for annotation", + ) + parser.add_argument( "--use_tta", default=False, @@ -156,6 +163,14 @@ def parse_args(): help="Quantization to use for Mistral language model", ) + parser.add_argument( + "--annotator_size", + type=str, + default="base", + choices=["base", "large"], + help="Size of the annotator model to use", + ) + parser.add_argument( "--batch_size_prompt", type=int, @@ -402,7 +417,7 @@ def main(): if args.task == "classification": # Classification annotation annotator_class = clf_annotators[args.image_annotator] - annotator = annotator_class(device=args.device) + annotator = annotator_class(device=args.device, size=args.annotator_size) labels_list = [] # Split image_paths into batches @@ -431,7 +446,7 @@ def main(): else: # Annotation annotator_class = det_annotators[args.image_annotator] - annotator = annotator_class(device=args.device) + annotator = annotator_class(device=args.device, size=args.annotator_size) boxes_list = [] scores_list = [] @@ -453,6 +468,7 @@ def main(): images, args.class_names, conf_threshold=args.conf_threshold, + iou_threshold=args.annotation_iou_threshold, use_tta=args.use_tta, synonym_dict=synonym_dict, ) From 4fba2dceccb513a8eb9082965ea70b536440bd82 Mon Sep 17 00:00:00 2001 From: Jan Cuhel Date: Tue, 19 Mar 2024 12:03:33 +0100 Subject: [PATCH 02/10] Add negative prompts, prompt prefix and suffix args & unittests for new args --- .../generate_dataset_from_scratch.py | 77 +++++++++++++++++++ tests/integration/test_pipeline.py | 30 ++++++++ tests/unittests/test_annotators.py | 30 ++++++-- 3 files changed, 130 insertions(+), 7 deletions(-) diff --git a/datadreamer/pipelines/generate_dataset_from_scratch.py b/datadreamer/pipelines/generate_dataset_from_scratch.py index 911f6bd..6dfb823 100644 --- a/datadreamer/pipelines/generate_dataset_from_scratch.py +++ b/datadreamer/pipelines/generate_dataset_from_scratch.py @@ -120,6 +120,64 @@ def parse_args(): help="Image annotator to use", ) + parser.add_argument( + "--negative_prompt", + type=str, + nargs="+", + default=[ + "cartoon", + "blue skin", + "painting", + "scrispture", + "golden", + "illustration", + "worst quality", + "low quality", + "normal quality:2", + "unrealistic dream", + "low resolution", + "static", + "sd character", + "low quality", + "low resolution", + "greyscale", + "monochrome", + "nose", + "cropped", + "lowres", + "jpeg artifacts", + "deformed iris", + "deformed pupils", + "bad eyes", + "semi-realistic worst quality", + "bad lips", + "deformed mouth", + "deformed face", + "deformed fingers", + "bad anatomy", + ], + help="List of of negative prompts to guide the generation away from certain features", + ) + + parser.add_argument( + "--prompt_suffix", + type=str, + nargs="+", + default=[ + "hd", + "8k", + "highly detailed", + ], + help="Suffix to add to every image generation prompt, e.g., for adding details like resolution", + ) + + parser.add_argument( + "--prompt_prefix", + type=str, + default="", + help="Prefix to add to every image generation prompt", + ) + parser.add_argument( "--conf_threshold", type=float, @@ -248,6 +306,10 @@ def check_args(args): if not 0 <= args.conf_threshold <= 1: raise ValueError("--conf_threshold must be between 0 and 1") + # Check annotation_iou_threshold + if not 0 <= args.annotation_iou_threshold <= 1: + raise ValueError("--annotation_iou_threshold must be between 0 and 1") + # Check image_tester_patience if args.image_tester_patience < 0: raise ValueError("--image_tester_patience must be a non-negative integer") @@ -257,6 +319,18 @@ def check_args(args): if not torch.cuda.is_available(): raise ValueError("CUDA is not available. Please use --device cpu") + # Check negative_prompt + if not args.negative_prompt or any( + not isinstance(name, str) for name in args.negative_prompt + ): + raise ValueError("--negative_prompt must be a non-empty list of strings") + + # Check prompt_suffix + if not args.prompt_suffix or any( + not isinstance(name, str) for name in args.prompt_suffix + ): + raise ValueError("--prompt_suffix must be a non-empty list of strings") + # Check for LM quantization availability if args.lm_quantization != "none" and ( args.device == "cpu" @@ -374,6 +448,9 @@ def main(): # Image generation image_generator_class = image_generators[args.image_generator] image_generator = image_generator_class( + prompt_prefix=args.prompt_prefix, + prompt_suffix=", " + ", ".join(args.prompt_suffix), + negative_prompt=", ".join(args.negative_prompt), seed=args.seed, use_clip_image_tester=args.use_image_tester, image_tester_patience=args.image_tester_patience, diff --git a/tests/integration/test_pipeline.py b/tests/integration/test_pipeline.py index 5fbe926..ed8ad02 100644 --- a/tests/integration/test_pipeline.py +++ b/tests/integration/test_pipeline.py @@ -122,12 +122,30 @@ def test_invalid_device(): _check_wrong_argument_choice(cmd) +def test_invalid_annotator_size(): + # Define the cmd + cmd = "datadreamer --annotator_size invalide_value" + _check_wrong_argument_choice(cmd) + + def test_empty_class_names(): # Define the cmd cmd = "datadreamer --class_names []" _check_wrong_value(cmd) +def test_empty_negative_prompt(): + # Define the cmd + cmd = "datadreamer --negative_prompt []" + _check_wrong_value(cmd) + + +def test_empty_prompt_suffix(): + # Define the cmd + cmd = "datadreamer --prompt_suffix []" + _check_wrong_value(cmd) + + def test_invalid_class_names(): # Define the cmd cmd = "datadreamer --class_names [2, -1]" @@ -152,6 +170,18 @@ def test_big_conf_threshold(): _check_wrong_value(cmd) +def test_negative_annotation_iou_threshold(): + # Define the cmd + cmd = "datadreamer --annotation_iou_threshold -1" + _check_wrong_value(cmd) + + +def test_big_annotation_iou_threshold(): + # Define the cmd + cmd = "datadreamer --annotation_iou_threshold 10" + _check_wrong_value(cmd) + + def test_invalid_image_tester_patience(): # Define the cmd cmd = "datadreamer --image_tester_patience -1" diff --git a/tests/unittests/test_annotators.py b/tests/unittests/test_annotators.py index 9f66867..698ed3d 100644 --- a/tests/unittests/test_annotators.py +++ b/tests/unittests/test_annotators.py @@ -14,10 +14,10 @@ total_disk_space = psutil.disk_usage("/").total / (1024**3) -def _check_owlv2_annotator(device: str): +def _check_owlv2_annotator(device: str, size: str = "base"): url = "https://ultralytics.com/images/bus.jpg" im = Image.open(requests.get(url, stream=True).raw) - annotator = OWLv2Annotator(device=device) + annotator = OWLv2Annotator(device=device, size=size) final_boxes, final_scores, final_labels = annotator.annotate_batch( [im], ["bus", "people"] ) @@ -51,14 +51,14 @@ def test_cuda_owlv2_annotator(): total_disk_space < 15, reason="Test requires at least 15GB of HDD", ) -def test_cou_owlv2_annotator(): +def test_cpu_owlv2_annotator(): _check_owlv2_annotator("cpu") -def _check_clip_annotator(device: str): +def _check_clip_annotator(device: str, size: str = "base"): url = "https://ultralytics.com/images/bus.jpg" im = Image.open(requests.get(url, stream=True).raw) - annotator = CLIPAnnotator(device=device) + annotator = CLIPAnnotator(device=device, size=size) labels = annotator.annotate_batch([im], ["bus", "people"]) # Check that the labels are lists assert isinstance(labels, list) and len(labels) == 1 @@ -70,7 +70,7 @@ def _check_clip_annotator(device: str): not torch.cuda.is_available() or total_disk_space < 15, reason="Test requires GPU and 15GB of HDD", ) -def test_cuda_clip_annotator(): +def test_cuda_clip_base_annotator(): _check_clip_annotator("cuda") @@ -78,5 +78,21 @@ def test_cuda_clip_annotator(): total_disk_space < 15, reason="Test requires at least 15GB of HDD", ) -def test_cpu_clip_annotator(): +def test_cpu_clip_base_annotator(): + _check_clip_annotator("cpu") + + +@pytest.mark.skipif( + not torch.cuda.is_available() or total_disk_space < 15, + reason="Test requires GPU and 15GB of HDD", +) +def test_cuda_clip_large_annotator(): + _check_clip_annotator("cuda") + + +@pytest.mark.skipif( + total_disk_space < 15, + reason="Test requires at least 15GB of HDD", +) +def test_cpu_clip_large_annotator(): _check_clip_annotator("cpu") From 26d575a65804ef83d8f9e1f89c4983a6812225f8 Mon Sep 17 00:00:00 2001 From: Jan Cuhel Date: Tue, 19 Mar 2024 14:41:35 +0100 Subject: [PATCH 03/10] Fix image size for large OWLv2 --- .../dataset_annotation/owlv2_annotator.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/datadreamer/dataset_annotation/owlv2_annotator.py b/datadreamer/dataset_annotation/owlv2_annotator.py index 6cbf183..1d4243c 100644 --- a/datadreamer/dataset_annotation/owlv2_annotator.py +++ b/datadreamer/dataset_annotation/owlv2_annotator.py @@ -97,7 +97,8 @@ def _generate_annotations( target_sizes = torch.Tensor(images[0].size[::-1]).repeat((n, 1)).to(self.device) # resize the images to the model's input size - images = [images[i].resize((960, 960)) for i in range(n)] + img_size = (1008, 1008) if self.size == "large" else (960, 960) + images = [images[i].resize(img_size) for i in range(n)] inputs = self.processor( text=batched_prompts, images=images, @@ -283,3 +284,16 @@ def release(self, empty_cuda_cache: bool = False) -> None: if empty_cuda_cache: with torch.no_grad(): torch.cuda.empty_cache() + + +if __name__ == "__main__": + import requests + from PIL import Image + + url = "https://ultralytics.com/images/bus.jpg" + im = Image.open(requests.get(url, stream=True).raw) + annotator = OWLv2Annotator(device="cpu", size="large") + final_boxes, final_scores, final_labels = annotator.annotate_batch( + [im], ["robot", "horse"] + ) + annotator.release() From d54f167f954d072b16514475b4d70cedf6e3b17c Mon Sep 17 00:00:00 2001 From: Jan Cuhel Date: Wed, 20 Mar 2024 07:32:19 +0100 Subject: [PATCH 04/10] Update README.md & check args --- README.md | 29 +++++++++++-------- .../generate_dataset_from_scratch.py | 12 +++++--- .../generate_dataset_and_train_yolo.ipynb | 29 +++++++++++-------- 3 files changed, 42 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index c59f83d..5a04bfe 100644 --- a/README.md +++ b/README.md @@ -101,30 +101,35 @@ datadreamer --save_dir --class_names --prompts_number ### Additional Parameters -- `--task`: Choose between `detection` and `classification`. Default is `detection`. +- `--task`: Choose between detection and classification. Default is `detection`. - `--num_objects_range`: Range of objects in a prompt. Default is 1 to 3. - `--prompt_generator`: Choose between `simple`, `lm` (language model) and `tiny` (tiny LM). Default is `simple`. - `--image_generator`: Choose image generator, e.g., `sdxl`, `sdxl-turbo` or `sdxl-lightning`. Default is `sdxl-turbo`. - `--image_annotator`: Specify the image annotator, like `owlv2` for object detection or `clip` for image classification. Default is `owlv2`. -- `--conf_threshold`: Confidence threshold for annotation. Default is 0.15. -- `--use_tta`: Toggle test time augmentation for object detection. Default is True. +- `--conf_threshold`: Confidence threshold for annotation. Default is `0.15`. +- `--annotation_iou_threshold`: Intersection over Union (IoU) threshold for annotation. Default is `0.2`. +- `--prompt_prefix`: Prefix to add to every image generation prompt. Default is `""`. +- `--prompt_suffix`: Suffix to add to every image generation prompt, e.g., for adding details like resolution. Default is `hd 8k "highly detailed"`. +- `--negative_prompt`: List of of negative prompts to guide the generation away from certain features. Default is `cartoon "blue skin" painting scrispture golden illustration "worst quality" "low quality" "normal quality:2" "unrealistic dream" "low resolution" static "sd character" "low quality" "low resolution" greyscale monochrome nose cropped lowres "jpeg artifacts" "deformed iris" "deformed pupils" "bad eyes" "semi-realistic worst quality" "bad lips" "deformed mouth" "deformed face" "deformed fingers" "bad anatomy"`. +- `--use_tta`: Toggle test time augmentation for object detection. Default is `True`. - `--synonym_generator`: Enhance class names with synonyms. Default is `none`. Other options are `llm`, `wordnet`. -- `--use_image_tester`: Use image tester for image generation. Default is False. -- `--image_tester_patience`: Patience level for image tester. Default is 1. +- `--use_image_tester`: Use image tester for image generation. Default is `False`. +- `--image_tester_patience`: Patience level for image tester. Default is `1`. - `--lm_quantization`: Quantization to use for Mistral language model. Choose between `none` and `4bit`. Default is `none`. +- `--annotator_size`: Size of the annotator model to use. Choose between `base` and `large`. Default is `base`. - `--batch_size_prompt`: Batch size for prompt generation. Default is 64. -- `--batch_size_annotation`: Batch size for annotation. Default is 8. -- `--batch_size_image`: Batch size for image generation. Default is 1. -- `--device`: Choose between `cuda` and `cpu`. Default is cuda. -- `--seed`: Set a random seed for image and prompt generation. Default is 42. +- `--batch_size_annotation`: Batch size for annotation. Default is `8`. +- `--batch_size_image`: Batch size for image generation. Default is `1`. +- `--device`: Choose between `cuda` and `cpu`. Default is `cuda`. +- `--seed`: Set a random seed for image and prompt generation. Default is `42`. diff --git a/datadreamer/pipelines/generate_dataset_from_scratch.py b/datadreamer/pipelines/generate_dataset_from_scratch.py index 6dfb823..098b12d 100644 --- a/datadreamer/pipelines/generate_dataset_from_scratch.py +++ b/datadreamer/pipelines/generate_dataset_from_scratch.py @@ -320,14 +320,18 @@ def check_args(args): raise ValueError("CUDA is not available. Please use --device cpu") # Check negative_prompt - if not args.negative_prompt or any( - not isinstance(name, str) for name in args.negative_prompt + if ( + not args.negative_prompt + or len(args.negative_prompt) < 1 + or any(not isinstance(name, str) for name in args.negative_prompt) ): raise ValueError("--negative_prompt must be a non-empty list of strings") # Check prompt_suffix - if not args.prompt_suffix or any( - not isinstance(name, str) for name in args.prompt_suffix + if ( + not args.prompt_suffix + or len(args.prompt_suffix) < 1 + or any(not isinstance(name, str) for name in args.prompt_suffix) ): raise ValueError("--prompt_suffix must be a non-empty list of strings") diff --git a/examples/generate_dataset_and_train_yolo.ipynb b/examples/generate_dataset_and_train_yolo.ipynb index 5749dfb..73f916a 100644 --- a/examples/generate_dataset_and_train_yolo.ipynb +++ b/examples/generate_dataset_and_train_yolo.ipynb @@ -74,25 +74,30 @@ "source": [ "### Parameters\n", "- `--save_dir` (required): Path to the directory for saving generated images and annotations.\n", - "- `--class_names` (required): Space-separated list of object names for image generation and annotation. Example: person moon robot.\n", - "- `--prompts_number` (optional): Number of prompts to generate for each object. Defaults to 10.\n", - "- `--annotate_only` (optional): Only annotate the images without generating new ones, prompt and image generator will be skipped. Defaults to False.\n", - "- `--task`: Choose between `detection` and `classification`. Default is `detection`.\n", + "- `--class_names` (required): Space-separated list of object names for image generation and annotation. Example: `person moon robot`.\n", + "- `--prompts_number` (optional): Number of prompts to generate for each object. Defaults to `10`.\n", + "- `--annotate_only` (optional): Only annotate the images without generating new ones, prompt and image generator will be skipped. Defaults to `False`.\n", + "- `--task`: Choose between detection and classification. Default is `detection`.\n", "- `--num_objects_range`: Range of objects in a prompt. Default is 1 to 3.\n", "- `--prompt_generator`: Choose between `simple`, `lm` (language model) and `tiny` (tiny LM). Default is `simple`.\n", "- `--image_generator`: Choose image generator, e.g., `sdxl`, `sdxl-turbo` or `sdxl-lightning`. Default is `sdxl-turbo`.\n", "- `--image_annotator`: Specify the image annotator, like `owlv2` for object detection or `clip` for image classification. Default is `owlv2`.\n", - "- `--conf_threshold`: Confidence threshold for annotation. Default is 0.15.\n", - "- `--use_tta`: Toggle test time augmentation for object detection. Default is True.\n", + "- `--conf_threshold`: Confidence threshold for annotation. Default is `0.15`.\n", + "- `--annotation_iou_threshold`: Intersection over Union (IoU) threshold for annotation. Default is `0.2`.\n", + "- `--prompt_prefix`: Prefix to add to every image generation prompt. Default is `\"\"`.\n", + "- `--prompt_suffix`: Suffix to add to every image generation prompt, e.g., for adding details like resolution. Default is `hd 8k \"highly detailed\"`.\n", + "- `--negative_prompt`: List of of negative prompts to guide the generation away from certain features. Default is `cartoon \"blue skin\" painting scrispture golden illustration \"worst quality\" \"low quality\" \"normal quality:2\" \"unrealistic dream\" \"low resolution\" static \"sd character\" \"low quality\" \"low resolution\" greyscale monochrome nose cropped lowres \"jpeg artifacts\" \"deformed iris\" \"deformed pupils\" \"bad eyes\" \"semi-realistic worst quality\" \"bad lips\" \"deformed mouth\" \"deformed face\" \"deformed fingers\" \"bad anatomy\"`.\n", + "- `--use_tta`: Toggle test time augmentation for object detection. Default is `True`.\n", "- `--synonym_generator`: Enhance class names with synonyms. Default is `none`. Other options are `llm`, `wordnet`.\n", - "- `--use_image_tester`: Use image tester for image generation. Default is False.\n", - "- `--image_tester_patience`: Patience level for image tester. Default is 1.\n", + "- `--use_image_tester`: Use image tester for image generation. Default is `False`.\n", + "- `--image_tester_patience`: Patience level for image tester. Default is `1`.\n", "- `--lm_quantization`: Quantization to use for Mistral language model. Choose between `none` and `4bit`. Default is `none`.\n", + "- `--annotator_size`: Size of the annotator model to use. Choose between `base` and `large`. Default is `base`.\n", "- `--batch_size_prompt`: Batch size for prompt generation. Default is 64.\n", - "- `--batch_size_annotation`: Batch size for annotation. Default is 8.\n", - "- `--batch_size_image`: Batch size for image generation. Default is 1.\n", - "- `--device`: Choose between `cuda` and `cpu`. Default is cuda.\n", - "- `--seed`: Set a random seed for image and prompt generation. Default is 42.\n" + "- `--batch_size_annotation`: Batch size for annotation. Default is `8`.\n", + "- `--batch_size_image`: Batch size for image generation. Default is `1`.\n", + "- `--device`: Choose between `cuda` and `cpu`. Default is `cuda`.\n", + "- `--seed`: Set a random seed for image and prompt generation. Default is `42`.\n" ] }, { From 9cb50f89a5df98b471d77aef4545364f3bde4000 Mon Sep 17 00:00:00 2001 From: Jan Cuhel Date: Sat, 23 Mar 2024 10:41:40 +0100 Subject: [PATCH 05/10] Correct clf unittests --- tests/integration/test_pipeline.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/integration/test_pipeline.py b/tests/integration/test_pipeline.py index ed8ad02..dcdff92 100644 --- a/tests/integration/test_pipeline.py +++ b/tests/integration/test_pipeline.py @@ -681,6 +681,7 @@ def test_cpu_simple_sdxl_turbo_classification_pipeline(): f"--prompts_number 1 " f"--prompt_generator simple " f"--num_objects_range 1 2 " + f"--image_annotator clip " f"--image_generator sdxl-turbo " f"--use_image_tester " f"--device cpu" @@ -704,6 +705,7 @@ def test_cuda_simple_sdxl_turbo_classification_pipeline(): f"--prompts_number 1 " f"--prompt_generator simple " f"--num_objects_range 1 2 " + f"--image_annotator clip " f"--image_generator sdxl-turbo " f"--use_image_tester " f"--device cuda" @@ -728,6 +730,7 @@ def test_cuda_simple_llm_synonym_sdxl_turbo_classification_pipeline(): f"--prompt_generator simple " f"--num_objects_range 1 2 " f"--image_generator sdxl-turbo " + f"--image_annotator clip " f"--use_image_tester " f"--synonym_generator llm " f"--device cuda" @@ -751,6 +754,7 @@ def test_cuda_simple_wordnet_synonym_sdxl_turbo_classification_pipeline(): f"--prompts_number 1 " f"--prompt_generator simple " f"--num_objects_range 1 2 " + f"--image_annotator clip " f"--image_generator sdxl-turbo " f"--use_image_tester " f"--synonym_generator wordnet " @@ -774,6 +778,7 @@ def test_cpu_simple_sdxl_classification_pipeline(): f"--class_names alien mars cat " f"--prompts_number 1 " f"--prompt_generator simple " + f"--image_annotator clip " f"--num_objects_range 1 2 " f"--image_generator sdxl " f"--use_image_tester " @@ -797,6 +802,7 @@ def test_cuda_simple_sdxl_classification_pipeline(): f"--class_names alien mars cat " f"--prompts_number 1 " f"--prompt_generator simple " + f"--image_annotator clip " f"--num_objects_range 1 2 " f"--image_generator sdxl " f"--use_image_tester " @@ -824,6 +830,7 @@ def test_cpu_lm_sdxl_turbo_classification_pipeline(): f"--prompts_number 1 " f"--prompt_generator lm " f"--num_objects_range 1 2 " + f"--image_annotator clip " f"--image_generator sdxl-turbo " f"--use_image_tester " f"--device cpu" @@ -847,6 +854,7 @@ def test_cuda_lm_sdxl_turbo_classification_pipeline(): f"--prompts_number 1 " f"--prompt_generator lm " f"--num_objects_range 1 2 " + f"--image_annotator clip " f"--image_generator sdxl-turbo " f"--use_image_tester " f"--device cuda" @@ -870,6 +878,7 @@ def test_cuda_4bit_lm_sdxl_turbo_classification_pipeline(): f"--prompts_number 1 " f"--prompt_generator lm " f"--num_objects_range 1 2 " + f"--image_annotator clip " f"--image_generator sdxl-turbo " f"--use_image_tester " f"--lm_quantization 4bit " @@ -893,6 +902,7 @@ def test_cpu_lm_sdxl_classification_pipeline(): f"--class_names alien mars cat " f"--prompts_number 1 " f"--prompt_generator lm " + f"--image_annotator clip " f"--num_objects_range 1 2 " f"--image_generator sdxl " f"--use_image_tester " @@ -916,6 +926,7 @@ def test_cuda_lm_sdxl_classification_pipeline(): f"--class_names alien mars cat " f"--prompts_number 1 " f"--prompt_generator lm " + f"--image_annotator clip " f"--num_objects_range 1 2 " f"--image_generator sdxl " f"--use_image_tester " @@ -940,6 +951,7 @@ def test_cuda_4bit_lm_sdxl_classification_pipeline(): f"--prompts_number 1 " f"--prompt_generator lm " f"--num_objects_range 1 2 " + f"--image_annotator clip " f"--image_generator sdxl " f"--use_image_tester " f"--lm_quantization 4bit " @@ -966,6 +978,7 @@ def test_cpu_tiny_sdxl_turbo_classification_pipeline(): f"--class_names alien mars cat " f"--prompts_number 1 " f"--prompt_generator tiny " + f"--image_annotator clip " f"--num_objects_range 1 2 " f"--image_generator sdxl-turbo " f"--use_image_tester " @@ -990,6 +1003,7 @@ def test_cuda_tiny_sdxl_turbo_classification_pipeline(): f"--prompts_number 1 " f"--prompt_generator tiny " f"--num_objects_range 1 2 " + f"--image_annotator clip " f"--image_generator sdxl-turbo " f"--use_image_tester " f"--device cuda" @@ -1013,6 +1027,7 @@ def test_cpu_tiny_sdxl_classification_pipeline(): f"--prompts_number 1 " f"--prompt_generator tiny " f"--num_objects_range 1 2 " + f"--image_annotator clip " f"--image_generator sdxl " f"--use_image_tester " f"--device cpu" @@ -1036,6 +1051,7 @@ def test_cuda_tiny_sdxl_classification_pipeline(): f"--prompts_number 1 " f"--prompt_generator tiny " f"--num_objects_range 1 2 " + f"--image_annotator clip " f"--image_generator sdxl " f"--use_image_tester " f"--device cuda" From 6be033e7640a153ddaa8a1719bb5c4a22d86511d Mon Sep 17 00:00:00 2001 From: Jan Cuhel Date: Sat, 23 Mar 2024 11:52:01 +0100 Subject: [PATCH 06/10] Fix unittests for negative prompt & prompt suffix --- .../pipelines/generate_dataset_from_scratch.py | 16 ++++++---------- tests/integration/test_pipeline.py | 4 ++-- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/datadreamer/pipelines/generate_dataset_from_scratch.py b/datadreamer/pipelines/generate_dataset_from_scratch.py index 098b12d..6e759ef 100644 --- a/datadreamer/pipelines/generate_dataset_from_scratch.py +++ b/datadreamer/pipelines/generate_dataset_from_scratch.py @@ -320,20 +320,16 @@ def check_args(args): raise ValueError("CUDA is not available. Please use --device cpu") # Check negative_prompt - if ( - not args.negative_prompt - or len(args.negative_prompt) < 1 - or any(not isinstance(name, str) for name in args.negative_prompt) + if not args.negative_prompt or any( + not isinstance(name, str) for name in args.negative_prompt ): - raise ValueError("--negative_prompt must be a non-empty list of strings") + raise ValueError("--negative_prompt must be a list of strings") # Check prompt_suffix - if ( - not args.prompt_suffix - or len(args.prompt_suffix) < 1 - or any(not isinstance(name, str) for name in args.prompt_suffix) + if not args.prompt_suffix or any( + not isinstance(name, str) for name in args.prompt_suffix ): - raise ValueError("--prompt_suffix must be a non-empty list of strings") + raise ValueError("--prompt_suffix must be a list of strings") # Check for LM quantization availability if args.lm_quantization != "none" and ( diff --git a/tests/integration/test_pipeline.py b/tests/integration/test_pipeline.py index dcdff92..8382e5a 100644 --- a/tests/integration/test_pipeline.py +++ b/tests/integration/test_pipeline.py @@ -136,13 +136,13 @@ def test_empty_class_names(): def test_empty_negative_prompt(): # Define the cmd - cmd = "datadreamer --negative_prompt []" + cmd = "datadreamer --negative_prompt --device cpu" _check_wrong_value(cmd) def test_empty_prompt_suffix(): # Define the cmd - cmd = "datadreamer --prompt_suffix []" + cmd = "datadreamer --prompt_suffix --device cpu" _check_wrong_value(cmd) From 9b458cfe359e0988d0c58d0bc43fdec973a766f9 Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Sun, 24 Mar 2024 18:23:19 +0000 Subject: [PATCH 07/10] [Automated] Updated coverage badge --- media/coverage_badge.svg | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/media/coverage_badge.svg b/media/coverage_badge.svg index 9d027c7..4f8c185 100644 --- a/media/coverage_badge.svg +++ b/media/coverage_badge.svg @@ -15,7 +15,7 @@ coverage coverage - 49% - 49% + 50% + 50% From 6bd4642b6d5939ad72b39e8f708cf726b4e0e584 Mon Sep 17 00:00:00 2001 From: Nikita Sokovnin Date: Sun, 24 Mar 2024 20:25:19 +0000 Subject: [PATCH 08/10] fix: change defaut value of generated synonyms to 3 --- datadreamer/prompt_generation/lm_synonym_generator.py | 2 +- datadreamer/prompt_generation/synonym_generator.py | 2 +- datadreamer/prompt_generation/wordnet_synonym_generator.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/datadreamer/prompt_generation/lm_synonym_generator.py b/datadreamer/prompt_generation/lm_synonym_generator.py index e1c4401..fc86db8 100644 --- a/datadreamer/prompt_generation/lm_synonym_generator.py +++ b/datadreamer/prompt_generation/lm_synonym_generator.py @@ -34,7 +34,7 @@ class LMSynonymGenerator(SynonymGenerator): def __init__( self, - synonyms_number: int = 5, + synonyms_number: int = 3, seed: Optional[float] = 42, device: str = "cuda", ) -> None: diff --git a/datadreamer/prompt_generation/synonym_generator.py b/datadreamer/prompt_generation/synonym_generator.py index b6a95e1..ec3f306 100644 --- a/datadreamer/prompt_generation/synonym_generator.py +++ b/datadreamer/prompt_generation/synonym_generator.py @@ -29,7 +29,7 @@ class SynonymGenerator(ABC): def __init__( self, - synonyms_number: int = 5, + synonyms_number: int = 3, seed: Optional[float] = 42, device: str = "cuda", ) -> None: diff --git a/datadreamer/prompt_generation/wordnet_synonym_generator.py b/datadreamer/prompt_generation/wordnet_synonym_generator.py index 4f8f6f8..994e74d 100644 --- a/datadreamer/prompt_generation/wordnet_synonym_generator.py +++ b/datadreamer/prompt_generation/wordnet_synonym_generator.py @@ -25,7 +25,7 @@ class WordNetSynonymGenerator(SynonymGenerator): def __init__( self, - synonyms_number: int = 5, + synonyms_number: int = 3, seed: Optional[float] = 42, device: str = "cuda", ) -> None: From 52f6426461621001488218e1ce9279aa4c95ec48 Mon Sep 17 00:00:00 2001 From: Jan Cuhel Date: Mon, 25 Mar 2024 22:17:49 +0100 Subject: [PATCH 09/10] Update negative prompt description --- README.md | 2 +- examples/generate_dataset_and_train_yolo.ipynb | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 5a04bfe..04df8c9 100644 --- a/README.md +++ b/README.md @@ -118,7 +118,7 @@ datadreamer --save_dir --class_names --prompts_number Date: Tue, 26 Mar 2024 17:32:13 +0100 Subject: [PATCH 10/10] Change --negative_prompt & --prompt_suffix args to string --- README.md | 4 +- .../generate_dataset_from_scratch.py | 59 ++----------------- .../generate_dataset_and_train_yolo.ipynb | 4 +- tests/integration/test_pipeline.py | 12 ---- 4 files changed, 9 insertions(+), 70 deletions(-) diff --git a/README.md b/README.md index 04df8c9..0f5c9c9 100644 --- a/README.md +++ b/README.md @@ -117,8 +117,8 @@ datadreamer --save_dir --class_names --prompts_number