diff --git a/skills/venice_image/README.md b/skills/venice_image/README.md new file mode 100644 index 00000000..913ea750 --- /dev/null +++ b/skills/venice_image/README.md @@ -0,0 +1,119 @@ +# Venice Image Skill Suite + +Venice Image is a comprehensive skill suite for intelligent agents, enabling state-of-the-art AI image generation, enhancement, upscaling, and vision analysis using the [Venice AI API](https://venice.ai/). This suite offers a modular interface: each sub-tool covers a focused aspect of visual intelligence, while sharing unified configuration and error handling. + +--- + +## Features + +### 1. **Image Generation** +Prompt-based creation of new artworks or photorealistic images, with support for multiple leading AI models, extensive style presets, and negative prompting. Models include: +- **Fluently XL** (realism, professional art) +- **Flux Dev** (innovative research, art workflows) +- **Lustify SDXL** (photorealistic, NSFW/SFW) +- **Pony Realism** (anime/character detail, Danbooru tags) +- **Venice SD35 / Stable Diffusion 3.5** (Stability AI, creative design) + +### 2. **Image Enhancement** +Stylize or refine *existing* images without changing their resolution—ideal for artistic edits, restoration, or visual polishing. + +### 3. **Image Upscaling** +Increase resolution by 2x or 4x while preserving essential details (with optional noise/replication settings). Great for preparing web images for print or HD use. + +### 4. **Image Vision** +Obtain highly detailed, context-rich textual descriptions of images—useful for content understanding, accessibility, indexing, or cognitive agents. + +--- + +## How It Works + +- Tools call the Venice API via secure network requests, automatically handling authentication, rate limiting, and error management. +- Any generated or processed images are transparently stored in an object store (S3 or compatible), with returned URLs ready for user consumption. +- Unified logging and troubleshooting: every tool shares a robust diagnostic backbone for consistent developer experience. + +--- + +## Setup and Configuration + +All skills require a **Venice API key** for operation. + +### Required Configuration +- `enabled` *(bool)*: Enable or disable the overall skill suite. +- `api_key` *(string, sensitive)*: Your [Venice AI API key](https://venice.ai/). +- `states`: Enable/disable and set visibility for each sub-tool (public/private/disabled). + +### Advanced Options +- `safe_mode` *(bool, default: true)*: If true, blurs images classified as adult/NSFW. +- `hide_watermark` *(bool, default: true)*: Request images without a Venice watermark (subject to Venice policy). +- `embed_exif_metadata` *(bool, default: false)*: Whether to embed prompt/config info in EXIF metadata. +- `negative_prompt` *(string)*: Default negative prompt, e.g. `(worst quality: 1.4), bad quality, nsfw`. +- `rate_limit_number` / `rate_limit_minutes`: (optional) Set a max request rate per agent. + +For per-tool configuration, refer to the `states` section in [schema.json](./schema.json): +- Each tool (e.g. `image_generation_flux_dev`, `image_enhance`, etc.) can be set to `"public"` (all users), `"private"` (agent owner only), or `"disabled"` (hidden). + +#### Example (YAML/JSON-like) +```json +{ + "enabled": true, + "api_key": "", + "safe_mode": true, + "states": { + "image_vision": "public", + "image_enhance": "private", + "image_upscale": "disabled", + "image_generation_flux_dev": "public" + } +} +``` + +--- + +## Usage Patterns + +Each sub-tool has its own standardized input: +- URL-based tools (`image_enhance`, `image_upscale`, `image_vision`) require a web-accessible image URL. +- Generation tools require a *prompt* and offer flexible parameters (size, style, negative prompt, etc). + +Errors and troubleshooting info are always returned in a structured dictionary, with clear separation of success and error fields. + +--- + +## Output and Storage + +- All generated/processed images are written to S3-compatible storage using a SHA256-based unique key. +- Returned URLs are agent-accessible and stable. +- For Vision and non-binary results, the output is returned inline as a dictionary. + +--- + +## Security, License & Compliance + +- Your Venice API key is required and kept confidential per config practices. +- Generated images and tool usage are subject to [Venice AI Terms of Service](https://venice.ai/) and the terms of the respective models (e.g. Stability AI, Black Forest Labs). +- Agents should implement their own access and moderation layers; Safe Mode and watermarking are best-effort. + +--- + +## Included Sub-Tools + +_(For detailed docs, see the respective sub-tool README entries.)_ + +- image_generation_fluently_xl +- image_generation_flux_dev +- image_generation_flux_dev_uncensored +- image_generation_lustify_sdxl +- image_generation_pony_realism +- image_generation_venice_sd35 +- image_generation_stable_diffusion_3_5 +- image_enhance +- image_upscale +- image_vision + +--- + +## Contributing & Support + +For issues, bugfixes, or requests, please open a GitHub issue or contact the maintainers. This suite is regularly updated as Venice AI evolves. + +--- diff --git a/skills/venice_image/__init__.py b/skills/venice_image/__init__.py index 75655835..33c13594 100644 --- a/skills/venice_image/__init__.py +++ b/skills/venice_image/__init__.py @@ -1,27 +1,38 @@ -# venice_image/__init__.py - import logging -from typing import List, NotRequired, Optional, TypedDict +from typing import NotRequired, Optional, TypedDict from abstracts.skill import SkillStoreABC from skills.base import ( SkillConfig, SkillState, -) # Assuming SkillState is like Literal["disabled", "public", "private"] +) # Import the base tool and all specific model skill classes from skills.venice_image.base import VeniceImageBaseTool -from skills.venice_image.image_generation_fluently_xl import ImageGenerationFluentlyXL -from skills.venice_image.image_generation_flux_dev import ImageGenerationFluxDev -from skills.venice_image.image_generation_flux_dev_uncensored import ( +from skills.venice_image.image_enhance.image_enhance import ImageEnhance +from skills.venice_image.image_generation.image_generation_fluently_xl import ( + ImageGenerationFluentlyXL, +) +from skills.venice_image.image_generation.image_generation_flux_dev import ( + ImageGenerationFluxDev, +) +from skills.venice_image.image_generation.image_generation_flux_dev_uncensored import ( ImageGenerationFluxDevUncensored, ) -from skills.venice_image.image_generation_lustify_sdxl import ImageGenerationLustifySDXL -from skills.venice_image.image_generation_pony_realism import ImageGenerationPonyRealism -from skills.venice_image.image_generation_stable_diffusion_3_5 import ( +from skills.venice_image.image_generation.image_generation_lustify_sdxl import ( + ImageGenerationLustifySDXL, +) +from skills.venice_image.image_generation.image_generation_pony_realism import ( + ImageGenerationPonyRealism, +) +from skills.venice_image.image_generation.image_generation_stable_diffusion_3_5 import ( ImageGenerationStableDiffusion35, ) -from skills.venice_image.image_generation_venice_sd35 import ImageGenerationVeniceSD35 +from skills.venice_image.image_generation.image_generation_venice_sd35 import ( + ImageGenerationVeniceSD35, +) +from skills.venice_image.image_upscale.image_upscale import ImageUpscale +from skills.venice_image.image_vision.image_vision import ImageVision # Cache skills at the system level, because they are stateless and only depend on the store _cache: dict[str, VeniceImageBaseTool] = {} @@ -31,6 +42,9 @@ # Define the expected structure for the 'states' dictionary in the config class SkillStates(TypedDict): + image_upscale: SkillState + image_enhance: SkillState + image_vision: SkillState image_generation_flux_dev: SkillState image_generation_flux_dev_uncensored: SkillState image_generation_venice_sd35: SkillState @@ -47,6 +61,7 @@ class Config(SkillConfig): enabled: bool # Keep standard enabled flag states: SkillStates + api_key_provider: str = "agent_owner" api_key: NotRequired[Optional[str]] # Explicitly Optional safe_mode: NotRequired[bool] # Defaults handled in base or usage hide_watermark: NotRequired[bool] # Defaults handled in base or usage @@ -55,8 +70,10 @@ class Config(SkillConfig): rate_limit_minutes: NotRequired[Optional[int]] # Explicitly Optional -# Map skill names to their corresponding classes for the factory function -_SKILL_NAME_TO_CLASS_MAP = { +_SKILL_NAME_TO_CLASS_MAP: dict[str, type[VeniceImageBaseTool]] = { + "image_upscale": ImageUpscale, + "image_enhance": ImageEnhance, + "image_vision": ImageVision, "image_generation_flux_dev": ImageGenerationFluxDev, "image_generation_flux_dev_uncensored": ImageGenerationFluxDevUncensored, "image_generation_venice_sd35": ImageGenerationVeniceSD35, @@ -64,16 +81,15 @@ class Config(SkillConfig): "image_generation_lustify_sdxl": ImageGenerationLustifySDXL, "image_generation_pony_realism": ImageGenerationPonyRealism, "image_generation_stable_diffusion_3_5": ImageGenerationStableDiffusion35, - # Add new mappings here: "skill_name": SkillClassName } async def get_skills( - config: Config, # Use the specific Config TypedDict for better type hinting + config: "Config", is_private: bool, store: SkillStoreABC, **_, # Allow for extra arguments if the loader passes them -) -> List[VeniceImageBaseTool]: +) -> list[VeniceImageBaseTool]: """Get all enabled Venice Image skills based on configuration and privacy level. Args: @@ -88,32 +104,28 @@ async def get_skills( if not config.get("enabled", False): return [] - available_skills: List[VeniceImageBaseTool] = [] - skill_states = config.get("states", {}) - - # Iterate through all known skills defined in the map - for skill_name in _SKILL_NAME_TO_CLASS_MAP: - state = skill_states.get( - skill_name, "disabled" - ) # Default to disabled if not in config + available_skills: list[VeniceImageBaseTool] = [] + # Include skills based on their state + for skill_name, state in config["states"].items(): if state == "disabled": continue elif state == "public" or (state == "private" and is_private): - # If enabled, get the skill instance using the factory function - skill_instance = get_venice_image_skill(skill_name, store) - if skill_instance: - available_skills.append(skill_instance) - else: - # This case should ideally not happen if the map is correct - logger.warning(f"Could not instantiate known skill: {skill_name}") + available_skills.append(skill_name) - return available_skills + # Get each skill using the cached getter + result = [] + for name in available_skills: + skill = get_venice_image_skill(name, store, config) + if skill: + result.append(skill) + return result def get_venice_image_skill( name: str, store: SkillStoreABC, + config: "Config", ) -> Optional[VeniceImageBaseTool]: """ Factory function to get a cached Venice Image skill instance by name. @@ -125,25 +137,18 @@ def get_venice_image_skill( Returns: The requested Venice Image skill instance, or None if the name is unknown. """ - # Check cache first + + # Return from cache immediately if already exists if name in _cache: return _cache[name] - # Get the class from the map skill_class = _SKILL_NAME_TO_CLASS_MAP.get(name) - - if skill_class: - try: - # Instantiate the skill and add to cache - instance = skill_class(skill_store=store) - _cache[name] = instance - return instance - except Exception as e: - logger.error( - f"Failed to instantiate Venice Image skill '{name}': {e}", exc_info=True - ) - return None # Failed to instantiate - else: - # This handles cases where a name might be in config but not in our map - logger.warning(f"Attempted to get unknown Venice Image skill: {name}") + if not skill_class: + logger.warning(f"Unknown Venice skill: {name}") return None + + # Cache and return the newly created instance + _cache[name] = skill_class( + skill_store=store, + ) + return _cache[name] diff --git a/skills/venice_image/api.py b/skills/venice_image/api.py new file mode 100644 index 00000000..2bc7bc2e --- /dev/null +++ b/skills/venice_image/api.py @@ -0,0 +1,138 @@ +""" +This module encapsulates all interactions with the Venice AI API. +It provides a function, make_venice_api_request, to make POST requests +to the API and handles the responses, including error handling, +content type checking, and image storage via S3. This separation +of concerns keeps the core skill logic cleaner and easier to maintain. +""" + +import hashlib +import logging +from typing import Any, Dict, Optional, Tuple + +import httpx + +from utils.s3 import store_image_bytes + +logger = logging.getLogger(__name__) + + +async def make_venice_api_request( + api_key: str, + path: str, + payload: Dict[str, Any], + category: str, + tool_name: str, +) -> Tuple[Dict[str, Any], Optional[Dict[str, Any]]]: + """ + Makes a POST request to the Venice AI API, handling all aspects + of the API interaction. This includes: + + 1. Constructing the API URL using a base URL and the provided path. + 2. Adding the required authorization header with the provided API key. + 3. Sending the POST request with the given payload. + 4. Handling potential connection and HTTP errors. + 5. Calling the internal _handle_response function to process the API's + response, which might be JSON or an image. + + Args: + api_key: The Venice AI API key for authentication. + path: The API endpoint path (e.g., "/api/v1/image/generate"). Should *not* start with the base URL. + payload: The data to send in the request body (as JSON). + category: The category of the skill making the request (e.g., "venice_image"). Used for S3 storage and logging purpose. + tool_name: The name of the tool or skill making the request (e.g., "image_generation"). Used for S3 storage and logging purpose. + + Returns: + A tuple: (success_data, error_data). + - success_data: A dictionary containing the parsed JSON response from the API if the request was successful + (or a dictionary containing the S3 URL if the response is an image). + - error_data: A dictionary containing information about any errors that occurred, + or None if the request was successful. The dictionary includes an 'error' key. + """ + + venice_base_url = "https://api.venice.ai" # Venice AI API base URL + + if not path.startswith("/"): + path = "/" + path + + api_url = f"{venice_base_url}{path}" + + headers = { + "Authorization": f"Bearer {api_key}", + "Content-Type": "application/json", + "Accept": "image/*, application/json", + } + + logger.info( + f"[{category}/{tool_name}] Sending request to {api_url} with payload: {payload}" + ) + + try: + async with httpx.AsyncClient(timeout=180.0) as client: + response = await client.post(api_url, json=payload, headers=headers) + return await _handle_response(response, category, tool_name) + + except httpx.RequestError as e: + error_msg = f"Connection error: {e}" + logger.error(f"[{category}/{tool_name}] {error_msg}") + return {}, {"success": False, "error": error_msg} + except Exception as e: + error_msg = f"Unexpected error: {e}" + logger.error(f"[{category}/{tool_name}] {error_msg}", exc_info=True) + return {}, {"success": False, "error": error_msg} + + +async def _handle_response( + response: httpx.Response, category: str, tool_name: str +) -> Tuple[Dict[str, Any], Optional[Dict[str, Any]]]: + """ + Handles the API response, differentiating between JSON and image responses. + + If the response is an image (based on the 'content-type' header), + it stores the image in S3 and returns the S3 URL. + If the response is JSON, it parses the JSON and returns it. + If any errors occur, it returns an error dictionary. + """ + + content_type = str(response.headers.get("content-type", "")).lower() + + if response.status_code == 200 and content_type.startswith("image/"): + try: + upscaled_image_bytes = response.content + image_hash = hashlib.sha256(upscaled_image_bytes).hexdigest() + file_extension = content_type.split("/")[-1].split("+")[0] or "png" + + key = f"{category}/{tool_name}/{image_hash}.{file_extension}" + + logger.info(f"[{category}/{tool_name}] Storing image with key: {key}") + + stored_url = await store_image_bytes( + upscaled_image_bytes, key, content_type=content_type + ) + + return {"success": True, "result": stored_url}, None + + except Exception as e: + error_msg = f"Error processing image response: {e}" + logger.error(f"[{category}/{tool_name}] {error_msg}", exc_info=True) + return {}, {"success": False, "error": error_msg} + + elif response.status_code == 200: + try: + logger.info(f"[{category}/{tool_name}] Received successful JSON response.") + return response.json(), None + except Exception as json_err: + error_msg = f"Failed to parse JSON response: {json_err} - {response.text}" + logger.error(f"[{category}/{tool_name}] {error_msg}") + return {}, {"success": False, "error": error_msg} + + else: + try: + error_data = response.json() + error_msg = f"API returned error: {error_data.get('message', error_data.get('detail', response.text))}" + logger.error(f"[{category}/{tool_name}] {error_msg}") + return {}, {"success": False, "error": error_msg} + except Exception: + error_msg = f"API returned status code {response.status_code} with text: {response.text}" + logger.error(f"[{category}/{tool_name}] {error_msg}") + return {}, {"success": False, "error": error_msg} diff --git a/skills/venice_image/base.py b/skills/venice_image/base.py index 01b76d47..d7e3ae45 100644 --- a/skills/venice_image/base.py +++ b/skills/venice_image/base.py @@ -1,259 +1,183 @@ -# venice_image/base.py -import hashlib import logging -from typing import Optional, Type +from typing import Any, Dict, Optional, Tuple -import httpx -from langchain_core.runnables import RunnableConfig -from pydantic import BaseModel, Field +from pydantic import Field -from abstracts.skill import SkillStoreABC -from skills.base import IntentKitSkill, SkillContext -from skills.venice_image.input import ( - VeniceImageGenerationInput, -) # Import the shared input schema - -# Ensure this import path is correct for your project structure -# Might be from ..utils.s3 or similar depending on your layout -from utils.s3 import store_image_bytes +from skills.base import IntentKitSkill, SkillContext, SkillStoreABC, ToolException +from skills.venice_image.api import ( + make_venice_api_request, +) +from skills.venice_image.config import VeniceImageConfig logger = logging.getLogger(__name__) -base_url = "https://api.venice.ai" +venice_base_url = "https://api.venice.ai" # Common base URL for all Venice endpoints class VeniceImageBaseTool(IntentKitSkill): - """Base class for Venice Image generation tools.""" + """ + Base class for all Venice AI image-related skills. - # --- Attributes Subclasses MUST Define --- - name: str = Field(description="The unique name of the tool/model.") - description: str = Field(description="A description of what the tool/model does.") - model_id: str = Field( - description="The specific model ID used in the Venice API call." - ) - # --- Shared Attributes --- - args_schema: Type[BaseModel] = ( - VeniceImageGenerationInput # Use the shared input schema - ) - skill_store: SkillStoreABC = Field( - description="The skill store for persisting data" - ) + This class provides common functionality for interacting with the + Venice AI API, including: - def get_api_key(self, context: SkillContext) -> Optional[str]: - """Get the API key, prioritizing agent config then system config.""" - # Check agent config first - agent_api_key = context.config.get("api_key") - if agent_api_key: - logger.debug(f"Using agent-specific Venice API key for skill {self.name}") - return agent_api_key - - # Fallback to system config - system_api_key = self.skill_store.get_system_config("venice_api_key") - if system_api_key: - logger.debug(f"Using system Venice API key for skill {self.name}") - return system_api_key - - logger.warning( - f"No Venice API key found in agent or system config for skill {self.name}" - ) - return None + - Retrieving the API key (from agent or system configuration). + - Applying rate limits to prevent overuse of the API. + - A standardized `post` method for making API requests. + + Subclasses should inherit from this class and implement their specific + API interactions (e.g., image generation, upscaling, inpainting) + by defining their own `_arun` methods and setting appropriate `name` + and `description` attributes. + """ @property def category(self) -> str: + """ + Returns the category of this skill, used for configuration and logging. + """ return "venice_image" - async def _arun( - self, - prompt: str, - negative_prompt: Optional[str] = None, - width: Optional[int] = 1024, - height: Optional[int] = 1024, - style_preset: Optional[str] = "Photographic", - config: RunnableConfig = None, - **kwargs, - ) -> str: + skill_store: SkillStoreABC = Field( + description="The skill store for persisting data and configs." + ) + + def getSkillConfig(self, context: SkillContext) -> VeniceImageConfig: """ - Core implementation to generate images using a specified Venice AI model. - This method is inherited by subclasses and uses self.model_id. + Creates a VeniceImageConfig instance from a dictionary of configuration values. + + Args: + config: A dictionary containing configuration settings. + + Returns: + A VeniceImageConfig object. """ - context = self.context_from_config(config) - skill_config = context.config # Agent-specific config for this skill category - # --- Configuration and Setup --- - api_key = self.get_api_key(context) - if not api_key: - logger.error(f"Venice AI API key not found for skill '{self.name}'") - raise ValueError( - "Venice AI API key not found. Please configure it in system or agent settings." - ) - - rate_limit_number = skill_config.get("rate_limit_number") - rate_limit_minutes = skill_config.get("rate_limit_minutes") - # Default safe_mode to False as per schema - safe_mode = skill_config.get("safe_mode", True) - hide_watermark = skill_config.get("hide_watermark", True) - default_negative_prompt = skill_config.get( - "negative_prompt", "(worst quality: 1.4), bad quality, nsfw" + return VeniceImageConfig( + api_key_provider=context.config.get("api_key_provider", "agent_owner"), + safe_mode=context.config.get("safe_mode", True), + hide_watermark=context.config.get("hide_watermark", True), + embed_exif_metadata=context.config.get("embed_exif_metadata", False), + negative_prompt=context.config.get( + "negative_prompt", "(worst quality: 1.4), bad quality, nsfw" + ), + rate_limit_number=context.config.get("rate_limit_number"), + rate_limit_minutes=context.config.get("rate_limit_minutes"), ) - # Apply rate limiting - using_agent_key = "api_key" in skill_config and skill_config["api_key"] - if using_agent_key and rate_limit_number and rate_limit_minutes: - logger.debug( - f"Applying agent rate limit ({rate_limit_number}/{rate_limit_minutes} min) for user {context.user_id} on skill {self.name}" - ) - await self.user_rate_limit_by_category( - context.user_id, rate_limit_number, rate_limit_minutes - ) - elif not using_agent_key: - # Try to get system rate limits if defined, otherwise use hardcoded defaults - sys_rate_limit_num = self.skill_store.get_system_config( - "venice_rate_limit_number", 10 - ) # Example: Default 10 - sys_rate_limit_min = self.skill_store.get_system_config( - "venice_rate_limit_minutes", 1440 - ) # Example: Default 1 day (1440 min) - if sys_rate_limit_num and sys_rate_limit_min: - logger.debug( - f"Applying system rate limit ({sys_rate_limit_num}/{sys_rate_limit_min} min) for user {context.user_id} on skill {self.name}" + def get_api_key(self, context: SkillContext) -> str: + """ + Retrieves the Venice AI API key based on the api_key_provider setting. + + Returns: + The API key if found. + + Raises: + ToolException: If the API key is not found or provider is invalid. + """ + try: + skillConfig = self.getSkillConfig(context=context) + if skillConfig.api_key_provider == "agent_owner": + agent_api_key = context.config.get("api_key") + if agent_api_key: + logger.debug( + f"Using agent-specific Venice API key for skill {self.name} in category {self.category}" + ) + return agent_api_key + raise ToolException( + f"No agent-owned Venice API key found for skill '{self.name}' in category '{self.category}'." ) - await self.user_rate_limit_by_category( - context.user_id, sys_rate_limit_num, sys_rate_limit_min + + elif skillConfig.api_key_provider == "platform": + system_api_key = self.skill_store.get_system_config("venice_api_key") + if system_api_key: + logger.debug( + f"Using system Venice API key for skill {self.name} in category {self.category}" + ) + return system_api_key + raise ToolException( + f"No platform-hosted Venice API key found for skill '{self.name}' in category '{self.category}'." ) + else: - # Fallback if system limits aren't configured at all - logger.warning( - f"System rate limits for Venice AI not configured. Applying default 10 requests/day for user {context.user_id} on skill {self.name}" + raise ToolException( + f"Invalid API key provider '{skillConfig.api_key_provider}' for skill '{self.name}'" ) - await self.user_rate_limit_by_category(context.user_id, 10, 1440) - # Use provided negative prompt or the default from config - final_negative_prompt = ( - negative_prompt if negative_prompt is not None else default_negative_prompt - ) + except Exception as e: + raise ToolException(f"Failed to retrieve Venice API key: {str(e)}") from e - # --- Prepare API Request --- - # Default steps vary per model, we use a reasonable default here. - # Could be made configurable per-model if needed via class attributes. - default_steps = 30 - # Get model-specific defaults if available (example, not implemented from descriptions) - # steps = getattr(self, 'default_steps', 30) - # cfg_scale = getattr(self, 'default_cfg', 7.0) - - payload = { - "model": self.model_id, # Use the model_id from the subclass - "prompt": prompt, - "width": width, - "height": height, - "steps": default_steps, # Use the determined steps - "safe_mode": safe_mode, - "hide_watermark": hide_watermark, - "cfg_scale": 7.0, # Use the determined cfg_scale - "style_preset": style_preset, - "negative_prompt": final_negative_prompt, - "return_binary": True, - } - # Clean payload: remove keys with None values as API might not like them - payload = {k: v for k, v in payload.items() if v is not None} - - logger.debug(f"Venice API ({self.model_id}) payload: {payload}") - - headers = { - "Authorization": f"Bearer {api_key}", - "Content-Type": "application/json", - "Accept": "image/*, application/json", # Accept images or JSON errors - } - api_url = f"{base_url}/api/v1/image/generate" - - # --- Execute API Call and Handle Response --- + async def apply_venice_rate_limit(self, context: SkillContext) -> None: + """ + Applies rate limiting to prevent exceeding the Venice AI API's rate limits. + + Rate limits are applied based on the api_key_provider setting: + - 'agent_owner': uses agent-specific configuration. + - 'platform': uses system-wide configuration. + """ try: - async with httpx.AsyncClient( - timeout=180.0 - ) as client: # Set timeout on client - response = await client.post(api_url, json=payload, headers=headers) - logger.debug( - f"Venice API ({self.model_id}) status code: {response.status_code}, Headers: {response.headers}" + user_id = context.user_id + skillConfig = self.getSkillConfig(context=context) + + if skillConfig.api_key_provider == "agent_owner": + limit_num = skillConfig.rate_limit_number + limit_min = skillConfig.rate_limit_minutes + + if limit_num and limit_min: + logger.debug( + f"Applying Agent rate limit ({limit_num}/{limit_min} min) for user {user_id} on {self.name}" + ) + await self.user_rate_limit_by_category( + user_id, limit_num, limit_min + ) + + elif skillConfig.api_key_provider == "platform": + system_limit_num = self.skill_store.get_system_config( + f"{self.category}_rate_limit_number" + ) + system_limit_min = self.skill_store.get_system_config( + f"{self.category}_rate_limit_minutes" ) - content_type = str(response.headers.get("content-type", "")).lower() - - # Success: Image received - if response.status_code == 200 and content_type.startswith("image/"): - image_bytes = response.content - # Use prompt and model in hash for better uniqueness if needed, but content hash is usually sufficient - image_hash = hashlib.sha256(image_bytes).hexdigest() - file_extension = content_type.split("/")[-1] # e.g. png, jpeg - # Sanitize extension if needed - if "+" in file_extension: - file_extension = file_extension.split("+")[0] - if not file_extension: - file_extension = "png" # Default extension - - key = f"venice/{self.model_id}/{image_hash}.{file_extension}" # e.g., venice/flux-dev/a1b2c3d4e5f6a7b8.png - - # Store the image bytes - stored_url = await store_image_bytes( - image_bytes, key, content_type=content_type + if system_limit_num and system_limit_min: + logger.debug( + f"Applying System rate limit ({system_limit_num}/{system_limit_min} min) for user {user_id} on {self.name}" ) - logger.info( - f"Venice ({self.model_id}) image generated and stored: {stored_url}" + await self.user_rate_limit_by_category( + user_id, system_limit_num, system_limit_min ) - return stored_url - - # Error: Handle non-200 or non-image responses - else: - error_message = f"Venice API ({self.model_id}) error:" - try: - # Attempt to parse JSON error response - error_data = response.json() - error_message += f" Status {response.status_code} - {error_data.get('message', response.text)}" - logger.error(f"{error_message} | Response: {error_data}") - except Exception as json_err: - # Fallback if response is not JSON - error_message += ( - f" Status {response.status_code} - {response.text}" - ) - logger.error( - f"{error_message} | Failed to parse JSON response: {json_err}" - ) - - # Raise a more informative error based on status if possible - if response.status_code == 400: - raise ValueError( - f"Bad request to Venice API ({self.model_id}). Check parameters. API response: {response.text}" - ) - elif response.status_code == 401: - raise PermissionError( - f"Authentication failed for Venice API ({self.model_id}). Check API key." - ) - elif response.status_code == 429: - raise ConnectionAbortedError( - f"Rate limit exceeded for Venice API ({self.model_id}). Try again later." - ) - else: - response.raise_for_status() # Raise HTTPStatusError for other non-2xx codes - - except httpx.HTTPStatusError as e: - # Logged above, re-raise a potentially more user-friendly exception - raise Exception( - f"Venice API error ({self.model_id}): Status {e.response.status_code} - {e.response.text}" - ) from e - except httpx.TimeoutException as e: - logger.error(f"Venice API ({self.model_id}) request timed out: {e}") - raise TimeoutError( - f"The request to Venice AI ({self.model_id}) timed out after 180 seconds." - ) from e - except httpx.RequestError as e: - logger.error(f"Venice API ({self.model_id}) request error: {e}") - raise ConnectionError( - f"Could not connect to Venice API ({self.model_id}): {str(e)}" - ) from e + # do nothing if no rate limit is + return None + except Exception as e: - logger.error( - f"Error generating image with Venice AI ({self.model_id}): {e}", - exc_info=True, - ) - # Avoid leaking internal details unless necessary - raise Exception( - f"An unexpected error occurred while generating the image using model {self.model_id}." - ) from e + raise ToolException(f"Failed to apply Venice rate limit: {str(e)}") from e + + async def post( + self, path: str, payload: Dict[str, Any], context: SkillContext + ) -> Tuple[Dict[str, Any], Optional[Dict[str, Any]]]: + """ + Makes a POST request to the Venice AI API using the `make_venice_api_request` + function from the `skills.venice_image.api` module. + + This method handles the following: + + 1. Retrieving the API key using `get_api_key`. + 2. Constructing the request payload. + 3. Calling `make_venice_api_request` to make the actual API call. + 4. Returning the results from `make_venice_api_request`. + + Args: + path: The API endpoint path (e.g., "/api/v1/image/generate"). + payload: The request payload as a dictionary. + context: The SkillContext for accessing API keys and configs. + + Returns: + A tuple: (success_data, error_data). + - If successful, success contains the JSON response from the API. + - If an error occurs, success is an empty dictionary, and error contains error details. + """ + api_key = self.get_api_key(context) + + return await make_venice_api_request( + api_key, path, payload, self.category, self.name + ) diff --git a/skills/venice_image/config.py b/skills/venice_image/config.py new file mode 100644 index 00000000..8bbe6ef1 --- /dev/null +++ b/skills/venice_image/config.py @@ -0,0 +1,35 @@ +from typing import Optional + +from pydantic import BaseModel, Field + + +class VeniceImageConfig(BaseModel): + """Skill Config for Venice Image.""" + + api_key_provider: str = Field( + default="agent_owner", + description="Provider of the API Key, could be agent_owner or platform", + ) + safe_mode: bool = Field( + default=True, + description="Whether to use safe mode. If enabled, this will blur images that are classified as having adult content", + ) + hide_watermark: bool = Field( + default=True, + description="Whether to hide the Venice watermark. Venice may ignore this parameter for certain generated content.", + ) + embed_exif_metadata: bool = Field( + default=False, description="Whether to embed EXIF metadata in the image." + ) + negative_prompt: str = Field( + default="(worst quality: 1.4), bad quality, nsfw", + description="The default negative prompt used when no other prompt is provided.", + ) + rate_limit_number: Optional[int] = Field( + default=None, + description="Maximum number of allowed calls within the specified time window.", + ) + rate_limit_minutes: Optional[int] = Field( + default=None, + description="Duration of the time window (in minutes) for rate limiting.", + ) diff --git a/skills/venice_image/image_enhance/README.md b/skills/venice_image/image_enhance/README.md new file mode 100644 index 00000000..f517836b --- /dev/null +++ b/skills/venice_image/image_enhance/README.md @@ -0,0 +1,119 @@ +# image_enhance + +**Image Enhance** enables you to improve, stylize, or refine an existing image using the Venice AI enhancer. Unlike upscaling, this tool keeps the original image size but substantially upgrades its visual quality, style, or texture—ideal for creative, restoration, or polishing use-cases. + +--- + +## What does it do? + +- Accepts a publicly accessible image URL. +- Uses a provided prompt to guide the desired enhancement—e.g., style, artistic direction, or quality upgrades (such as “gold accents”, “vivid color”, “oil painting”, or “gentle watercolor”). +- Supports adjustment of the intensity of enhancement and how much original detail is preserved (creativity & replication). +- Returns a new image (matching original dimensions) with enhanced appearance and style. + +Typical uses: +- Sharpen and clarify blurry images. +- Instantly “re-theme” a photo or artwork (color, material, style transfer). +- Polish images for social, ecommerce, professional, or creative projects. + +--- + +## Input Parameters + +| Field | Type | Description | Required | Default | +|-------------------|---------------|------------------------------------------------------------------------------------------------------------------|----------|---------| +| image_url | str (HttpUrl) | Publicly accessible URL of the image to enhance | Yes | | +| enhancePrompt | str | **Describes the desired enhancement, style, or theme.** Concise, descriptive terms work best. | Yes | | +| replication | float | How much of the original image structure, lines, and noise are retained (0.1–1.0). | No | 0.35 | +| enhanceCreativity | float | How far the AI can diverge from the original (0 = subtle, 1 = max stylization/new image). | No | 0.5 | + +**Prompt Examples (for `enhancePrompt`):** +- `"marble, gold veins, high contrast"` +- `"vaporwave color palette, cyberpunk lighting"` +- `"oil painting, impasto brushwork"` +- `"smooth skin, brighten shadows, cinematic look"` + +Example input: +```json +{ + "image_url": "https://img.site/old-photo.png", + "enhancePrompt": "soft watercolor, pastel tones, gentle light", + "replication": 0.25, + "enhanceCreativity": 0.7 +} +``` + +--- + +## Output + +On success, returns: +```json +{ + "success": true, + "result": "https://s3.storage.example/venice_image/image_enhance/ab12cd...png" +} +``` + +On error: +```json +{ + "success": false, + "error": "Failed to fetch or validate image from URL: ...", + "result": null +} +``` + +--- + +## Typical Use Cases + +- **Commerce/Product Images**: Instantly polish web photos for catalogs or listings. +- **Restoration**: Revive faded or dated artwork/photos for social, framing, or print. +- **Style Transfer**: Make a photo look like “stained glass”, “anime cel”, or “movie still”. +- **Social & Art Creation**: Quickly freshen up images for sharing with a unique twist. + +--- + +## Advanced Notes + +- **Replication**: + - Lower (`~0.1`): AI smooths out noise/details, crisper/cleaner look. + - Higher (`~0.9`): Retain original grit, preserve realistic features, more subtle change. +- **EnhanceCreativity**: + - Lower (`0.0`): Only very minor tweaks. + - Higher (`1.0`): Might look like a fully new artwork in the target style. +- **Image must be accessible and in a supported format**; conversion to PNG is automatic if needed. +- **Original resolution is kept**; for larger output, use `image_upscale` after enhancement. + +--- + +## Limitations + +- Does not increase resolution—use in conjunction with upscaling for large deliverables. +- Not a restoration-of-lost-content tool: Real degradation or loss isn’t recoverable, though apparent fidelity can be improved. +- The style quality depends on the provided enhancement prompt and the source image clarity. + +--- + +## Example Usage (Python-esque pseudocode) + +```python +result = await agent.send_tool( + "image_enhance", + { + "image_url": "https://cdn.site/photo.jpg", + "enhancePrompt": "marble, gold details, glowing edges", + "enhanceCreativity": 0.9 + } +) +enhanced_url = result["result"] +``` + +--- + +## Attribution & Compliance + +Use of this tool is subject to [Venice AI terms of service](https://venice.ai/) and applicable copyright law for input images. + +--- \ No newline at end of file diff --git a/skills/venice_image/image_enhance/image_enhance.py b/skills/venice_image/image_enhance/image_enhance.py new file mode 100644 index 00000000..47e614b6 --- /dev/null +++ b/skills/venice_image/image_enhance/image_enhance.py @@ -0,0 +1,80 @@ +import logging +from typing import Optional + +from langchain_core.runnables import RunnableConfig +from pydantic import HttpUrl + +from skills.base import ToolException +from skills.venice_image.image_enhance.image_enhance_base import ( + VeniceImageEnhanceBaseTool, +) +from skills.venice_image.utils import fetch_image_as_base64 + +logger = logging.getLogger(__name__) + + +class ImageEnhance(VeniceImageEnhanceBaseTool): + """ + Enhances an existing image provided via URL using the Venice AI enhancer (not upscaling). + Useful for improving visual quality, adding style, or refining image features. + """ + + name: str = "venice_image_enhance" + description: str = ( + "Enhances an existing image from a URL using Venice AI.\n" + "Provide the public URL of the image to enhance.\n" + "Specify enhancement creativity level and a required prompt for style.\n" + "Returns the URL of the enhanced image." + ) + + async def _arun( + self, + image_url: HttpUrl, + enhancePrompt: str, + replication: Optional[float] = 0.35, + enhanceCreativity: Optional[float] = 0.5, + config: RunnableConfig = None, + **kwargs, + ) -> dict: + """ + Applies AI enhancement to an image without changing its size. + """ + + try: + context = self.context_from_config(config) + + await self.apply_venice_rate_limit(context) + + image_base64 = await fetch_image_as_base64(image_url) + if not image_base64: + error_msg = f"Failed to fetch or validate image from URL: {image_url}" + logger.error(error_msg) + raise ToolException( + str({"success": False, "error": error_msg, "result": None}) + ) + + payload = { + "image": image_base64, + "scale": 1, + "enhance": True, + "replication": replication, + "enhanceCreativity": enhanceCreativity, + "enhancePrompt": enhancePrompt, + } + + result, error = await self.post("api/v1/image/upscale", payload, context) + if error: + raise ToolException(f"Venice Image Enhance API error: {error}") + return result + except ToolException as e: + raise e + except Exception as e: + logger.error(f"Error in {self.name}: {str(e)}") + raise ToolException( + str( + { + "success": False, + "error": f"An unexpected error occurred: {str(e)}", + } + ) + ) diff --git a/skills/venice_image/image_enhance/image_enhance_base.py b/skills/venice_image/image_enhance/image_enhance_base.py new file mode 100644 index 00000000..7529aec0 --- /dev/null +++ b/skills/venice_image/image_enhance/image_enhance_base.py @@ -0,0 +1,23 @@ +from typing import Type + +from pydantic import BaseModel, Field + +# Import the generic base and shared input +from skills.venice_image.base import VeniceImageBaseTool +from skills.venice_image.image_enhance.image_enhance_input import ( + VeniceImageEnhanceInput, +) + + +class VeniceImageEnhanceBaseTool(VeniceImageBaseTool): + """ + Base class for Venice AI *Image Enchanching* tools. + Inherits from VeniceAIBaseTool and handles specifics of the + /image/upscale endpoint + """ + + args_schema: Type[BaseModel] = VeniceImageEnhanceInput + name: str = Field(description="The unique name of the image Enchanching tool.") + description: str = Field( + description="A description of what the image Enchanching tool does." + ) diff --git a/skills/venice_image/image_enhance/image_enhance_input.py b/skills/venice_image/image_enhance/image_enhance_input.py new file mode 100644 index 00000000..6a11bc0c --- /dev/null +++ b/skills/venice_image/image_enhance/image_enhance_input.py @@ -0,0 +1,40 @@ +from typing import Optional + +from pydantic import BaseModel, Field + + +class VeniceImageEnhanceInput(BaseModel): + """Input for Venice Image Enhance tool (scale=1, enhance=True).""" + + image_url: str = Field( + description="The URL of the image to enhance. Must be a publicly accessible URL." + ) + + enhancePrompt: str = Field( + ..., + max_length=1500, + description=( + "Required prompt describing the desired enhancement style. " + "Best used with short descriptors like 'gold', 'marble', or 'angry, menacing'." + ), + ) + + replication: Optional[float] = Field( + default=0.35, + ge=0.1, + le=1.0, + description=( + "How strongly lines and noise in the base image are preserved. " + "Higher values retain more noise and detail but are less smooth." + ), + ) + + enhanceCreativity: float = Field( + default=0.5, + ge=0.0, + le=1.0, + description=( + "How much the enhancement AI is allowed to change the image. " + "0 = minimal change, 1 = generate a new image entirely." + ), + ) diff --git a/skills/venice_image/image_generation/README.md b/skills/venice_image/image_generation/README.md new file mode 100644 index 00000000..c00dd3fc --- /dev/null +++ b/skills/venice_image/image_generation/README.md @@ -0,0 +1,144 @@ +# Venice Image Generation Tools + +Venice Image Generation provides flexible, prompt-based image creation using multiple state-of-the-art AI models via the Venice AI API. These tools support a broad spectrum of styles, subject matter, and output formats, making it ideal for artists, designers, marketers, research, and personal creativity. + +--- + +## Overview + +- **Purpose:** Synthesize original images from natural-language prompts. +- **Supported Models:** Choose from several world-class models, each suited for different tasks: + - **Fluently XL:** Professional realism, lighting, artistic work. + - **Flux Dev:** Artistic research, innovative and creative workflows. + - **Flux Dev Uncensored:** For unrestricted, uncensored generation (including NSFW). + - **Lustify SDXL:** Photorealistic, NSFW/SFW versatility. + - **Pony Realism:** High-detail anime/character design (great with Danbooru tags). + - **Venice SD35/Stable Diffusion 3.5:** Artistic, illustrative, or design content, powered by Stability AI. +- **Unified Interface:** Each model is its own sub-tool, but all support the same core set of options. + +--- + +## Input Parameters + +| Field | Type | Description | Required | Default | +|------------------|---------------------------------|--------------------------------------------------------------------------------------------------------|----------|---------------| +| prompt | string | Main description of the image to generate. Use detailed, specific language for best results. | Yes | | +| model_id | string (see below) | AI model to use. Each sub-tool sets its own model_id internally. | N/A | (hardcoded) | +| width | int (max 2048) | Output image width (pixels). Must be multiple of 8 or 16 depending on model. | No | 1024 | +| height | int (max 2048) | Output image height (pixels). | No | 1024 | +| format | "png" \| "jpeg" \| "webp" | Output image format. | No | png | +| style_preset | string (enumerated) | Optional visual preset (e.g., "Photographic", "Anime", "Abstract", etc.). See full list below. | No | Photographic | +| negative_prompt | string | Exclude these elements or concepts from the image (e.g. “nsfw, low quality”). | No | suite default | +| seed | int | Control randomness. Reuse a value for repeatability. | No | random | +| cfg_scale | float (e.g. 1–20) | Prompt fidelity – higher = closer adherence to prompt, lower = more variety. | No | 7.5 | +| return_binary | bool | Always `False`. Output is a hosted URL, not inline binary. | N/A | False | +| safe_mode | bool | If enabled, applies content filtering / blurring for NSFW. | Inherited | true | +| embed_exif_metadata | bool | If enabled, embeds prompt info in output EXIF metadata. | Inherited | false | +| hide_watermark | bool | Hide the Venice watermark, where possible. | Inherited | true | + +#### Example Prompt: +> "In the style of a Renaissance oil painting, a fierce orange tabby cat with a crown, surrounded by lush velvet drapery and golden sunlight." + +#### Style Presets +An extensive list is included, for example: +- "Photographic" +- "Anime" +- "Cinematic" +- "Digital Art" +- "Abstract" +- "Cubist" +- ...and over 30 more. See documentation or schema for the full list. + +#### Example Input: +```json +{ + "prompt": "A highly detailed portrait of a robot playing chess, cinematic lighting, photoreal 4k", + "width": 1536, + "height": 1024, + "format": "jpeg", + "style_preset": "Cinematic", + "cfg_scale": 10, + "negative_prompt": "text, watermark, blurry", + "seed": 424242 +} +``` + +--- + +## Output + +The tool returns a dict that includes: + +- `success`: true/false +- `image_url`: The URL to the generated image (stored in S3 or similar object storage) +- Additional metadata (generation params, seed, etc.) + +Example: +```json +{ + "success": true, + "image_url": "https://s3.my-storage.net/venice_image/fluently-xl/abc123f....png", + "seed": 424242, + "generation_time_s": 22.4 +} +``` + +On error: +```json +{ + "success": false, + "error": "API returned error: prompt too long", + "result": null +} +``` + +--- + +## Advanced Capabilities + +- **Inpainting**: Modify regions of an existing image with precise mask and text controls (see schema for input structure). +- **EXIF Embedding**: If enabled, the tool can embed the prompt/config info in the output file’s EXIF metadata. + +--- + +## Use Cases + +- **Art & Design:** Instantly create drafts, mood boards, or finished art for any assignment +- **Marketing/Content:** Rapid visual ideation for blog posts, social media, ads, covers, etc. +- **Ideation/Research:** Visualize concepts, inventions, or speculative scenarios quickly +- **Education:** Generate visual teaching content on demand +- **Character/Concept Design:** Leverage anime/artistic models for avatars, OC creation, comics + +--- + +## Limitations + +- Results are only as good as your prompt and model choice. +- NSFW filtering varies by model; check the tool’s description and enable `safe_mode` for safety. +- Some style/subject combinations may not be supported by a given model. +- Stable Diffusion/Flux Dev variants may have license restrictions—review Venice API and model TOS. + +--- + +## Example Usage (Pseudo-code) + +```python +result = await agent.send_tool( + "image_generation_fluently_xl", + { + "prompt": "A futuristic cityscape at sunset, neon lights, flying cars, cinematic", + "style_preset": "Cinematic", + "width": 1280, + "height": 704 + } +) +url = result["image_url"] +``` + +--- + +## Compliance & Attribution + +You must respect [Venice AI terms of service](https://venice.ai/) and the terms and licenses of the selected model. + +--- diff --git a/skills/venice_image/image_generation/image_generation_base.py b/skills/venice_image/image_generation/image_generation_base.py new file mode 100644 index 00000000..9ab920d9 --- /dev/null +++ b/skills/venice_image/image_generation/image_generation_base.py @@ -0,0 +1,117 @@ +import base64 +import hashlib +import logging +from typing import Any, Dict, Literal, Optional, Type + +from langchain_core.runnables import RunnableConfig +from pydantic import BaseModel, Field + +# Import the generic base +from skills.base import ToolException +from skills.venice_image.base import VeniceImageBaseTool +from skills.venice_image.image_generation.image_generation_input import ( + VeniceImageGenerationInput, +) +from utils.s3 import store_image_bytes + +logger = logging.getLogger(__name__) + + +class VeniceImageGenerationBaseTool(VeniceImageBaseTool): + """ + Base class for Venice AI *Image Generation* tools. + Inherits from VeniceAIBaseTool and handles specifics of the + /image/generate endpoint. + """ + + # --- Attributes specific to Image Generation --- + args_schema: Type[BaseModel] = VeniceImageGenerationInput + + # --- Attributes Subclasses MUST Define --- + name: str = Field(description="The unique name of the image generation tool/model.") + description: str = Field( + description="A description of what the image generation tool/model does." + ) + model_id: str = Field( + description="The specific model ID used in the Venice Image API call." + ) + + async def _arun( + self, + prompt: str, + seed: Optional[int] = None, + negative_prompt: Optional[str] = None, + width: Optional[int] = 1024, + height: Optional[int] = 1024, + format: Literal["png", "jpeg", "webp"] = "png", + cfg_scale: Optional[float] = 7.5, + style_preset: Optional[str] = "Photographic", + config: RunnableConfig = None, + **kwargs, + ) -> Dict[str, Any]: + try: + context = self.context_from_config(config) + skillConfig = self.getSkillConfig(context) + await self.apply_venice_rate_limit(context) + + final_negative_prompt = negative_prompt or skillConfig.negative_prompt + + payload = { + "model": self.model_id, + "prompt": prompt, + "width": width, + "height": height, + "seed": seed, + "format": format, + "steps": 30, + "safe_mode": skillConfig.safe_mode, + "hide_watermark": skillConfig.hide_watermark, + "embed_exif_metadata": skillConfig.embed_exif_metadata, + "cfg_scale": cfg_scale or 7.0, + "style_preset": style_preset, + "negative_prompt": final_negative_prompt, + "return_binary": False, + } + + # Strip out None values + payload = {k: v for k, v in payload.items() if v is not None} + + result, error = await self.post("/api/v1/image/generate", payload, context) + + if error: + raise ToolException(f"Venice Image Generation API error: {error}") + + base64_image_string = result.get("images", [None])[0] + if not base64_image_string: + raise ToolException("No image data found in Venice Image API response.") + + try: + image_bytes = base64.b64decode(base64_image_string) + except Exception as decode_error: + raise ToolException("Invalid base64 image data.") from decode_error + + response_format = ( + result.get("request", {}).get("data", {}).get("format", format) + ) + file_extension = response_format or format + content_type = f"image/{file_extension}" + + image_hash = hashlib.sha256(image_bytes).hexdigest() + key = f"{self.category}/{self.model_id}/{image_hash}.{file_extension}" + + stored_url = await store_image_bytes( + image_bytes, key, content_type=content_type + ) + + # Cleanup & enrich the response + result.pop("images", None) + result["image_url"] = stored_url + result["image_bytes_sha256"] = image_hash + + return result + except ToolException as e: + raise e + except Exception as e: + raise ToolException( + "An unexpected error occurred during the image generation process." + ) from e diff --git a/skills/venice_image/image_generation_fluently_xl.py b/skills/venice_image/image_generation/image_generation_fluently_xl.py similarity index 67% rename from skills/venice_image/image_generation_fluently_xl.py rename to skills/venice_image/image_generation/image_generation_fluently_xl.py index 77a8e022..5e95d479 100644 --- a/skills/venice_image/image_generation_fluently_xl.py +++ b/skills/venice_image/image_generation/image_generation_fluently_xl.py @@ -1,9 +1,10 @@ -# venice_image/image_generation_fluently_xl.py -from skills.venice_image.base import VeniceImageBaseTool -from skills.venice_image.input import STYLE_PRESETS # Keep for description +from skills.venice_image.image_generation.image_generation_base import ( + VeniceImageGenerationBaseTool, +) +from skills.venice_image.image_generation.image_generation_input import STYLE_PRESETS -class ImageGenerationFluentlyXL(VeniceImageBaseTool): +class ImageGenerationFluentlyXL(VeniceImageGenerationBaseTool): """ Tool for generating images using the Fluently-XL model via Venice AI. Known for aesthetics, lighting, realism, and correct anatomy. @@ -20,4 +21,4 @@ class ImageGenerationFluentlyXL(VeniceImageBaseTool): ) model_id: str = "fluently-xl" - # args_schema and _arun are inherited from VeniceImageBaseTool + # args_schema and _arun are inherited from VeniceImageGenerationBaseTool diff --git a/skills/venice_image/image_generation_flux_dev.py b/skills/venice_image/image_generation/image_generation_flux_dev.py similarity index 69% rename from skills/venice_image/image_generation_flux_dev.py rename to skills/venice_image/image_generation/image_generation_flux_dev.py index ff180a35..e1dafe24 100644 --- a/skills/venice_image/image_generation_flux_dev.py +++ b/skills/venice_image/image_generation/image_generation_flux_dev.py @@ -1,9 +1,10 @@ -# venice_image/image_generation_flux_dev.py -from skills.venice_image.base import VeniceImageBaseTool -from skills.venice_image.input import STYLE_PRESETS # Keep for description +from skills.venice_image.image_generation.image_generation_base import ( + VeniceImageGenerationBaseTool, +) +from skills.venice_image.image_generation.image_generation_input import STYLE_PRESETS -class ImageGenerationFluxDev(VeniceImageBaseTool): +class ImageGenerationFluxDev(VeniceImageGenerationBaseTool): """ Tool for generating images using Venice AI's Flux Dev model. Developed by Black Forest Labs, this is a 12 billion parameter rectified flow transformer. @@ -21,4 +22,4 @@ class ImageGenerationFluxDev(VeniceImageBaseTool): ) model_id: str = "flux-dev" - # args_schema and _arun are inherited from VeniceImageBaseTool + # args_schema and _arun are inherited from VeniceImageGenerationBaseTool diff --git a/skills/venice_image/image_generation_flux_dev_uncensored.py b/skills/venice_image/image_generation/image_generation_flux_dev_uncensored.py similarity index 67% rename from skills/venice_image/image_generation_flux_dev_uncensored.py rename to skills/venice_image/image_generation/image_generation_flux_dev_uncensored.py index 1ddecd1a..15f99e8b 100644 --- a/skills/venice_image/image_generation_flux_dev_uncensored.py +++ b/skills/venice_image/image_generation/image_generation_flux_dev_uncensored.py @@ -1,9 +1,10 @@ -# venice_image/image_generation_flux_dev_uncensored.py -from skills.venice_image.base import VeniceImageBaseTool -from skills.venice_image.input import STYLE_PRESETS # Keep for description +from skills.venice_image.image_generation.image_generation_base import ( + VeniceImageGenerationBaseTool, +) +from skills.venice_image.image_generation.image_generation_input import STYLE_PRESETS -class ImageGenerationFluxDevUncensored(VeniceImageBaseTool): +class ImageGenerationFluxDevUncensored(VeniceImageGenerationBaseTool): """ Tool for generating images using Venice AI's Flux Dev Uncensored model. An uncensored version of the flux-dev model for unrestricted generation. @@ -20,4 +21,4 @@ class ImageGenerationFluxDevUncensored(VeniceImageBaseTool): ) model_id: str = "flux-dev-uncensored" - # args_schema and _arun are inherited from VeniceImageBaseTool + # args_schema and _arun are inherited from VeniceImageGenerationBaseTool diff --git a/skills/venice_image/image_generation/image_generation_input.py b/skills/venice_image/image_generation/image_generation_input.py new file mode 100644 index 00000000..6b8f7433 --- /dev/null +++ b/skills/venice_image/image_generation/image_generation_input.py @@ -0,0 +1,158 @@ +from typing import Literal, Optional + +from pydantic import BaseModel, Field, HttpUrl + +STYLE_PRESETS = [ + "3D Model", + "Analog Film", + "Anime", + "Cinematic", + "Comic Book", + "Craft Clay", + "Digital Art", + "Enhance", + "Fantasy Art", + "Isometric Style", + "Line Art", + "Lowpoly", + "Neon Punk", + "Origami", + "Photographic", + "Pixel Art", + "Texture", + "Advertising", + "Food Photography", + "Real Estate", + "Abstract", + "Cubist", + "Graffiti", + "Hyperrealism", + "Impressionist", + "Pointillism", + "Pop Art", + "Psychedelic", + "Renaissance", + "Steampunk", + "Surrealist", + "Typography", + "Watercolor", + "Fighting Game", + "GTA", + "Super Mario", + "Minecraft", + "Pokemon", + "Retro Arcade", + "Retro Game", + "RPG Fantasy Game", + "Strategy Game", + "Street Fighter", + "Legend of Zelda", + "Architectural", + "Disco", + "Dreamscape", + "Dystopian", + "Fairy Tale", + "Gothic", + "Grunge", + "Horror", + "Minimalist", + "Monochrome", + "Nautical", + "Space", + "Stained Glass", + "Techwear Fashion", + "Tribal", + "Zentangle", + "Collage", + "Flat Papercut", + "Kirigami", + "Paper Mache", + "Paper Quilling", + "Papercut Collage", + "Papercut Shadow Box", + "Stacked Papercut", + "Thick Layered Papercut", + "Alien", + "Film Noir", + "HDR", + "Long Exposure", + "Neon Noir", + "Silhouette", + "Tilt-Shift", +] + +STYLE_PRESETS_DESCRIPTION = ( + "Optional style preset to apply. Available options: " + + ", ".join([f"'{s}'" for s in STYLE_PRESETS]) + + ". Defaults to 'Photographic'." +) + + +class InpaintMask(BaseModel): + image_prompt: str = Field( + ..., + description="A text prompt describing the original input image that an image model would use to produce a similar/identical image, including the changed features the user will be inpainting.", + ) + inferred_object: str = Field( + ..., description="The content being added via inpainting." + ) + object_target: str = Field( + ..., description="Element(s) in the original image to be inpainted over." + ) + + +class Inpaint(BaseModel): + image_url: HttpUrl = Field( + ..., + description="Image target to inpaint", + ) + strength: int = Field( + ..., ge=0, le=100, description="Strength of the inpainting (0-100).", example=50 + ) + mask: InpaintMask = Field(..., description="Mask settings for inpainting.") + + +class VeniceImageGenerationInput(BaseModel): + """Model representing input parameters for Venice Image Generation.""" + + prompt: str = Field( + description="The main text prompt describing what should be included in the generated image." + ) + seed: Optional[int] = Field( + default=None, + description="Random seed value to control image generation randomness. " + "Use the same seed to reproduce identical results. If not set, a random seed will be used.", + ) + negative_prompt: Optional[str] = Field( + default=None, + description="Text describing what should be excluded from the generated image. " + "If not provided, the default agent configuration will be used.", + ) + width: Optional[int] = Field( + default=1024, + le=2048, + description="Width of the generated image in pixels. Maximum allowed is 2048. Default is 1024.", + ) + height: Optional[int] = Field( + default=1024, + le=2048, + description="Height of the generated image in pixels. Maximum allowed is 2048. Default is 1024.", + ) + format: Literal["png", "jpeg", "webp"] = Field( + default="png", + description="Output image format. Options are 'png', 'jpeg', or 'webp'. Defaults to 'png'.", + ) + cfg_scale: Optional[float] = Field( + default=7.5, + description="Classifier-Free Guidance (CFG) scale controls how closely the image follows the prompt. " + "Higher values (1-20) result in more adherence. Default is 7.5.", + ) + style_preset: Optional[str] = Field( + default="Photographic", description=STYLE_PRESETS_DESCRIPTION + ) + inpainting: Optional[Inpaint] = Field( + default=None, + description="Optional inpainting operation that allows modification of specific objects within an image. " + "Requires an original image url, a strength value (0-100), and detailed mask instructions " + "to define which part of the image should be edited and what should replace it.", + ) diff --git a/skills/venice_image/image_generation_lustify_sdxl.py b/skills/venice_image/image_generation/image_generation_lustify_sdxl.py similarity index 68% rename from skills/venice_image/image_generation_lustify_sdxl.py rename to skills/venice_image/image_generation/image_generation_lustify_sdxl.py index c2f5cd3e..9f03c46d 100644 --- a/skills/venice_image/image_generation_lustify_sdxl.py +++ b/skills/venice_image/image_generation/image_generation_lustify_sdxl.py @@ -1,9 +1,10 @@ -# venice_image/image_generation_lustify_sdxl.py -from skills.venice_image.base import VeniceImageBaseTool -from skills.venice_image.input import STYLE_PRESETS # Keep for description +from skills.venice_image.image_generation.image_generation_base import ( + VeniceImageGenerationBaseTool, +) +from skills.venice_image.image_generation.image_generation_input import STYLE_PRESETS -class ImageGenerationLustifySDXL(VeniceImageBaseTool): +class ImageGenerationLustifySDXL(VeniceImageGenerationBaseTool): """ Tool for generating images using the Lustify SDXL model via Venice AI. A photorealistic SDXL checkpoint primarily focused on NSFW content, but can do SFW. @@ -20,4 +21,4 @@ class ImageGenerationLustifySDXL(VeniceImageBaseTool): ) model_id: str = "lustify-sdxl" - # args_schema and _arun are inherited from VeniceImageBaseTool + # args_schema and _arun are inherited from VeniceImageGenerationBaseTool diff --git a/skills/venice_image/image_generation_pony_realism.py b/skills/venice_image/image_generation/image_generation_pony_realism.py similarity index 69% rename from skills/venice_image/image_generation_pony_realism.py rename to skills/venice_image/image_generation/image_generation_pony_realism.py index 06eb237a..ed99b73a 100644 --- a/skills/venice_image/image_generation_pony_realism.py +++ b/skills/venice_image/image_generation/image_generation_pony_realism.py @@ -1,9 +1,10 @@ -# venice_image/image_generation_pony_realism.py -from skills.venice_image.base import VeniceImageBaseTool -from skills.venice_image.input import STYLE_PRESETS # Keep for description +from skills.venice_image.image_generation.image_generation_base import ( + VeniceImageGenerationBaseTool, +) +from skills.venice_image.image_generation.image_generation_input import STYLE_PRESETS -class ImageGenerationPonyRealism(VeniceImageBaseTool): +class ImageGenerationPonyRealism(VeniceImageGenerationBaseTool): """ Tool for generating images using the Pony Realism model via Venice AI. Focused on high-detail, realistic images, especially anime/character designs. Uses Danbooru tags. @@ -20,4 +21,4 @@ class ImageGenerationPonyRealism(VeniceImageBaseTool): ) model_id: str = "pony-realism" - # args_schema and _arun are inherited from VeniceImageBaseTool + # args_schema and _arun are inherited from VeniceImageGenerationBaseTool diff --git a/skills/venice_image/image_generation_stable_diffusion_3_5.py b/skills/venice_image/image_generation/image_generation_stable_diffusion_3_5.py similarity index 72% rename from skills/venice_image/image_generation_stable_diffusion_3_5.py rename to skills/venice_image/image_generation/image_generation_stable_diffusion_3_5.py index 77aa2035..cdf1b4d9 100644 --- a/skills/venice_image/image_generation_stable_diffusion_3_5.py +++ b/skills/venice_image/image_generation/image_generation_stable_diffusion_3_5.py @@ -1,9 +1,10 @@ -# venice_image/image_generation_stable_diffusion_3_5.py -from skills.venice_image.base import VeniceImageBaseTool -from skills.venice_image.input import STYLE_PRESETS # Keep for description +from skills.venice_image.image_generation.image_generation_base import ( + VeniceImageGenerationBaseTool, +) +from skills.venice_image.image_generation.image_generation_input import STYLE_PRESETS -class ImageGenerationStableDiffusion35(VeniceImageBaseTool): +class ImageGenerationStableDiffusion35(VeniceImageGenerationBaseTool): """ Tool for generating images using Venice AI's interface to Stable Diffusion 3.5 Large (alternative ID). Developed by Stability AI, using MMDiT architecture. Good for art and design. @@ -22,4 +23,4 @@ class ImageGenerationStableDiffusion35(VeniceImageBaseTool): # Use the specific ID provided by Venice model_id: str = "stable-diffusion-3.5" # Different model ID - # args_schema and _arun are inherited from VeniceImageBaseTool + # args_schema and _arun are inherited from VeniceImageGenerationBaseTool diff --git a/skills/venice_image/image_generation_venice_sd35.py b/skills/venice_image/image_generation/image_generation_venice_sd35.py similarity index 71% rename from skills/venice_image/image_generation_venice_sd35.py rename to skills/venice_image/image_generation/image_generation_venice_sd35.py index d3022355..7ae62b4a 100644 --- a/skills/venice_image/image_generation_venice_sd35.py +++ b/skills/venice_image/image_generation/image_generation_venice_sd35.py @@ -1,9 +1,10 @@ -# venice_image/image_generation_venice_sd35.py -from skills.venice_image.base import VeniceImageBaseTool -from skills.venice_image.input import STYLE_PRESETS # Keep for description +from skills.venice_image.image_generation.image_generation_base import ( + VeniceImageGenerationBaseTool, +) +from skills.venice_image.image_generation.image_generation_input import STYLE_PRESETS -class ImageGenerationVeniceSD35(VeniceImageBaseTool): +class ImageGenerationVeniceSD35(VeniceImageGenerationBaseTool): """ Tool for generating images using Venice AI's interface to Stable Diffusion 3.5 Large. Developed by Stability AI, using MMDiT architecture. Good for art and design. @@ -22,4 +23,4 @@ class ImageGenerationVeniceSD35(VeniceImageBaseTool): # Use the specific ID provided by Venice, assuming it matches the name model_id: str = "venice-sd35" - # args_schema and _arun are inherited from VeniceImageBaseTool + # args_schema and _arun are inherited from VeniceImageGenerationBaseTool diff --git a/skills/venice_image/image_upscale/README.md b/skills/venice_image/image_upscale/README.md new file mode 100644 index 00000000..c1b622bc --- /dev/null +++ b/skills/venice_image/image_upscale/README.md @@ -0,0 +1,111 @@ +# image_upscale + +**Image Upscale** is a sub-tool of the Venice Image suite. It uses Venice AI’s powerful super-resolution models to increase the size and clarity of images by 2x or 4x, making low-resolution images suitable for HD displays, print, or content enhancement. This is not just simple pixel stretching—it uses AI to intelligently recreate additional detail, texture, and smoothness. + +--- + +## What does it do? + +Given any publicly accessible image URL, the tool fetches the image, applies deep-learning upscaling (super-resolution), and returns a new image URL to the upscaled output. Users can choose between 2x or 4x upscaling depending on needs, and can optionally control how much "realism"/texture is preserved from the original. + +Key benefits: +- Consistent color, sharpness, and clarity at higher resolutions +- AI removes pixelation and can reduce compression artifacts +- Optional "replication" factor lets you tune how much of the original’s noise/detail is restored + +--- + +## Input + +| Field | Type | Description | Required | Default | +|--------------|------------------|------------------------------------------------------------------------------------------------------|----------|---------| +| image_url | HttpUrl | Public URL to the image you want to upscale. | Yes | | +| scale | Literal[2, 4] | The scaling factor (2 for 2x, 4 for 4x enlargement). | Yes | 2 | +| replication | float (0.1–1.0) | How much to preserve edges, texture, and noise from original (higher = more detail, less smoothing). | No | 0.35 | + +Example: +```json +{ + "image_url": "https://example.com/photo.jpg", + "scale": 4, + "replication": 0.5 +} +``` + +--- + +## Output + +On success, returns a result dictionary containing at least: +- `success`: true +- `result`: URL for the upscaled image (typically hosted on S3 or compatible object storage) +- Additional metadata as needed + +Example: +```json +{ + "success": true, + "result": "https://s3.storage.example/venice_image/image_upscale/1a2b3c....png" +} +``` + +On error: +```json +{ + "success": false, + "error": "Failed to fetch or validate image from URL: ...", + "result": null +} +``` + +--- + +## Typical Use Cases + +- **Photo Restoration** – Upscale old, small web images for print or display +- **Content Creation** – Create HD assets from AI-generated or web-ripped images +- **Design/Prototyping** – Improve source assets for posters, presentations, or large canvas +- **Archival** – Enhance legacy digital art or research scans + +--- + +## Advanced Notes + +- Works for all common raster formats (JPG, PNG, WEBP). Unsupported types are auto-converted to PNG. +- "Replication" factor explanation: + - **Low values (e.g., 0.1–0.25):** smoother, less noise, more “plastic” look (good for AI/clean results) + - **High values (e.g., 0.7–1.0):** preserves original photo noise/texture, less smoothing (good for art/photo upscaling) +- Original aspect ratio is always preserved. + +--- + +## Limitations + +- Does not add content—only increases fidelity of existing features. +- Output detail is limited by source image quality and AI model limits. +- NSFW images will be blurred if safe mode is enabled. + +--- + +## Example Usage (Pseudo-code) + +```python +result = await agent.send_tool( + "image_upscale", + { + "image_url": "https://somehost.com/image.png", + "scale": 2, + "replication": 0.4 + } +) +upscaled_url = result["result"] +``` + +--- + +## Compliance & Attribution + +- You must have rights to use the supplied image. +- Follows [Venice AI terms of service](https://venice.ai/). + +--- \ No newline at end of file diff --git a/skills/venice_image/image_upscale/image_upscale.py b/skills/venice_image/image_upscale/image_upscale.py new file mode 100644 index 00000000..fa67af72 --- /dev/null +++ b/skills/venice_image/image_upscale/image_upscale.py @@ -0,0 +1,90 @@ +import logging +from typing import Literal, Optional + +from langchain_core.runnables import RunnableConfig +from pydantic import HttpUrl + +from skills.base import ToolException +from skills.venice_image.image_upscale.image_upscale_base import ( + VeniceImageUpscaleBaseTool, +) +from skills.venice_image.utils import fetch_image_as_base64 + +logger = logging.getLogger(__name__) + + +class ImageUpscale(VeniceImageUpscaleBaseTool): + """ + Upscales an existing image provided via URL by a factor of 2 or 4 using the Venice AI API. + Ideal for enhancing the resolution of previously generated or existing images. + """ + + # --- Tool Specific Configuration --- + name: str = "venice_image_upscale" + description: str = ( + "Upscales an existing image from a URL using Venice AI.\n" + "Provide the public URL of the image to upscale.\n" + "Specify the desired scale factor: 2 (for 2x upscale) or 4 (for 4x upscale).\n" + "Returns the URL of the upscaled image." + ) + + # No model_id needed for the generic upscale endpoint currently + async def _arun( + self, + image_url: HttpUrl, + scale: Literal[2, 4], + replication: Optional[float] = 0.35, + config: RunnableConfig = None, + **kwargs, + ) -> dict: + """ + Asynchronously upscales an image from the provided URL using the Venice AI API. + + Args: + image_url (HttpUrl): The public URL of the image to upscale. + scale (Literal[2, 4]): The scale factor for upscaling (2x or 4x). + replication (Optional[float]): The replication factor for the upscale process, defaults to 0.35. + config (RunnableConfig, optional): Configuration for the runnable, if any. + **kwargs: Additional keyword arguments. + + Returns: + dict: The API response containing the URL of the upscaled image. + + Raises: + ToolException: If the image cannot be fetched, validated, or upscaled, or if an API error occurs. + """ + + try: + context = self.context_from_config(config) + + await self.apply_venice_rate_limit(context) + + image_base64 = await fetch_image_as_base64(image_url) + if not image_base64: + error_msg = f"Failed to fetch or validate image from URL: {image_url}" + logger.error(error_msg) + raise ToolException( + str({"success": False, "error": error_msg, "result": None}) + ) + + payload = { + "image": image_base64, + "scale": scale, + "replication": replication, + } + result, error = await self.post("api/v1/image/upscale", payload, context) + if error: + raise ToolException(f"Venice Image Upscale API error: {error}") + return result + except ToolException as e: + raise e + except Exception as e: + logger.error(f"Error in {self.name}: {str(e)}") + raise ToolException( + str( + { + "success": False, + "error": f"An unexpected error occurred: {str(e)}", + } + ) + ) diff --git a/skills/venice_image/image_upscale/image_upscale_base.py b/skills/venice_image/image_upscale/image_upscale_base.py new file mode 100644 index 00000000..452204c3 --- /dev/null +++ b/skills/venice_image/image_upscale/image_upscale_base.py @@ -0,0 +1,23 @@ +from typing import Type + +from pydantic import BaseModel, Field + +# Import the generic base and shared input +from skills.venice_image.base import VeniceImageBaseTool +from skills.venice_image.image_upscale.image_upscale_input import ( + VeniceImageUpscaleInput, +) + + +class VeniceImageUpscaleBaseTool(VeniceImageBaseTool): + """ + Base class for Venice AI *Image Upscaling* tools. + Inherits from VeniceAIBaseTool and handles specifics of the + /image/upscale endpoint + """ + + args_schema: Type[BaseModel] = VeniceImageUpscaleInput + name: str = Field(description="The unique name of the image upscaling tool.") + description: str = Field( + description="A description of what the image upscaling tool does." + ) diff --git a/skills/venice_image/image_upscale/image_upscale_input.py b/skills/venice_image/image_upscale/image_upscale_input.py new file mode 100644 index 00000000..4179a00e --- /dev/null +++ b/skills/venice_image/image_upscale/image_upscale_input.py @@ -0,0 +1,22 @@ +from typing import Literal, Optional + +from pydantic import BaseModel, Field, HttpUrl + + +class VeniceImageUpscaleInput(BaseModel): + """Input for the Image Upscale tool.""" + + image_url: HttpUrl = Field( + description="The URL of the image to upscale. Must be a publicly accessible URL.", + ) + replication: Optional[float] = Field( + default=0.35, + description=( + 'How strongly lines and noise in the base image are preserved. Higher values are noisier but less plastic/AI "generated"/hallucinated. Must be between 0.1 and 1.' + "Required range: 0.1 <= x <= 1" + ), + ) + scale: Literal[2, 4] = Field( + default=2, + description="The factor by which to upscale the image (either 2 or 4). Defaults to 2.", + ) diff --git a/skills/venice_image/image_vision/README.md b/skills/venice_image/image_vision/README.md new file mode 100644 index 00000000..e6cbd546 --- /dev/null +++ b/skills/venice_image/image_vision/README.md @@ -0,0 +1,112 @@ +# image_vision + +**Image Vision** is a sub-tool in the Venice Image suite that provides highly detailed, comprehensive, AI-generated textual descriptions of images. It is designed for analyzing and summarizing the visual content of any image accessible via URL. + +--- + +## What does it do? + +This tool uses Venice AI’s latest visual-language model (`qwen-2.5-vl`) to “see” an image as a human or curator would. It returns a paragraph-length, multi-faceted, exhaustive description covering: + +- All visible objects and their properties (colors, shapes, count, arrangement) +- Scene composition: spatial arrangement, relationships, perspective +- Surface textures, materials, lighting, color palette +- Contextual, stylistic, or artistic features (e.g., “art deco style,” “digital illustration”) +- Mood, visual storytelling elements, or any notable anomalies +- Additional inferred details where possible + +This tool is ideal for accessibility, archiving, content discovery, search, and cognitive AI workflows. + +--- + +## Input + +| Field | Type | Description | Required | +|--------------|----------|---------------------------------------------------------------------|----------| +| image_url | HttpUrl | Publicly accessible URL to the target image. | Yes | + +Example: +```json +{ + "image_url": "https://example.com/some_picture.jpg" +} +``` + +--- + +## Example Output + +A typical result will be a dictionary with the generated description under a relevant key (the raw API response may vary based on Venice formats): + +```json +{ + "success": true, + "result": "A vibrant, high-resolution digital illustration depicting a Venetian canal at midday. The scene features pastel-hued buildings on either side of the canal with ornate balconies and open shuttered windows. Gondolas and small boats glide over the calm, reflective water, casting rippling shadows. The sky is clear and blue, with sunlight streaming across the facades, creating sharp contrasts and lively reflections. Crowds of tourists are visible on the far bank, while colorful banners and flowerpots accent the architecture. The composition is balanced, with attention to perspective and depth, and the general mood is lively and picturesque." +} +``` + +In case of errors (invalid URL, fetch issues, inappropriate filetype, etc.), a descriptive error message is returned: + +```json +{ + "success": false, + "error": "Failed to fetch or validate image from URL: https://example.com/broken.jpg", + "result": null +} +``` + +--- + +## Typical Use Cases + +- **Accessibility:** Generate alt-text for visually impaired users. +- **AI Agents:** Understand and react to visual content in workflow automations. +- **Search & Tagging:** Automatically caption and index photo libraries. +- **Content Moderation:** Pre-screen or context-check image uploads. +- **Educational Tools:** Explain or transcribe visual materials for students. + +--- + +## Advanced Notes + +- The tool only works with image URLs that are publicly accessible and in a common web format (JPG, PNG, etc). +- Image URLs are validated and, where necessary, format-normalized using Pillow. +- The system never stores or caches the image, but will download it temporarily for analysis. + +**Model Details** +Venice AI leverages licensed large vision-language models; this tool currently uses `qwen-2.5-vl`, known for dense, multi-aspect, human-like image explanations. + +--- + +## Configuration Options + +- No special options; inherits API key, safe mode, and base logging from the main suite configuration. + +--- + +## Limitations + +- May not detect “hidden” content, steganographic messages, or small details lost in low-res images. +- Will describe as best possible—if the image is blank, corrupted, or unrelated, a best-effort bland description may be given. +- Not a content moderator—use with your own safety checks if required. + +--- + +## Example Usage (Pseudo-code) +```python +result = await agent.send_tool( + "image_vision", + { + "image_url": "https://mycdn.com/image.jpg" + } +) +desc = result["result"] +``` + +--- + +## Attribution/Compliance + +All usage subject to [Venice AI terms of service](https://venice.ai/). Do not use for unlawful or privacy-invading data mining. + +--- diff --git a/skills/venice_image/image_vision/image_vision.py b/skills/venice_image/image_vision/image_vision.py new file mode 100644 index 00000000..fa28aa97 --- /dev/null +++ b/skills/venice_image/image_vision/image_vision.py @@ -0,0 +1,98 @@ +import logging +from typing import Any, Type + +from langchain_core.runnables import RunnableConfig +from pydantic import BaseModel, HttpUrl + +from skills.base import ToolException +from skills.venice_image.image_vision.image_vision_base import ( + VeniceImageVisionBaseTool, +) +from skills.venice_image.image_vision.image_vision_input import VeniceImageVision +from skills.venice_image.utils import fetch_image_as_base64 + +logger = logging.getLogger(__name__) + + +class ImageVision(VeniceImageVisionBaseTool): + """ + Describes an image provided via URL using the Venice AI API. + Ideal for understanding the content of an existing image. + """ + + name: str = "venice_image_vision" + description: str = ( + "Describes an image from a URL using Venice AI.\n" + "Provide the public URL of the image to describe.\n" + "Returns a descriptive text of the image." + ) + args_schema: Type[BaseModel] = VeniceImageVision + # No model_id needed for the generic vision endpoint currently + + async def _arun( + self, + image_url: HttpUrl, + config: RunnableConfig = None, + **kwargs, + ) -> dict[str, Any]: + try: + context = self.context_from_config(config) + + await self.apply_venice_rate_limit(context) + + image_base64 = await fetch_image_as_base64(image_url) + if not image_base64: + error_msg = f"Failed to fetch or validate image from URL: {image_url}" + logger.error(error_msg) + return {"success": False, "error": error_msg, "result": None} + + payload = { + "model": "qwen-2.5-vl", + "messages": [ + { + "role": "system", + "content": [ + { + "type": "text", + "text": ( + "You are an AI model that provides detailed descriptions of images. " + "When given an image, you must respond with a description that is as comprehensive and detailed as possible. " + "Focus on identifying all objects, colors, textures, and any other relevant features present in the image. " + "Provide a thorough and exhaustive account of what is visible in the image." + ), + } + ], + }, + { + "role": "user", + "content": [ + { + "type": "text", + "text": ( + "Provide an extremely detailed description of the image, focusing on every discernible aspect. " + "Include information about objects, colors, textures, lighting conditions, artistic style (if applicable), " + "composition, and any other relevant details that would allow someone to accurately understand and potentially " + "recreate the image. Be as thorough and comprehensive as possible." + ), + }, + {"type": "image_url", "image_url": {"url": str(image_url)}}, + ], + }, + ], + } + + result, error = await self.post("api/v1/chat/completions", payload, context) + if error: + raise ToolException(f"Venice Image Vision API error: {error}") + return result + except ToolException as e: + return { + "success": False, + "error": f"An unexpected error occurred: {str(e)}", + } + except Exception as e: + logger.error(f"Error in {self.name}: {str(e)}") + return { + "success": False, + "error": f"An unexpected error occurred: {str(e)}", + } diff --git a/skills/venice_image/image_vision/image_vision_base.py b/skills/venice_image/image_vision/image_vision_base.py new file mode 100644 index 00000000..bac8b9df --- /dev/null +++ b/skills/venice_image/image_vision/image_vision_base.py @@ -0,0 +1,17 @@ +from pydantic import Field + +# Import the generic base and shared input +from skills.venice_image.base import VeniceImageBaseTool + + +class VeniceImageVisionBaseTool(VeniceImageBaseTool): + """ + Base class for Venice AI *Image Vision* tools. + Inherits from VeniceAIBaseTool and handles specifics of the + /chat/completions endpoint. + """ + + name: str = Field(description="The unique name of the image vision tool.") + description: str = Field( + description="A description of what the image vision tool does." + ) diff --git a/skills/venice_image/image_vision/image_vision_input.py b/skills/venice_image/image_vision/image_vision_input.py new file mode 100644 index 00000000..72ba8e39 --- /dev/null +++ b/skills/venice_image/image_vision/image_vision_input.py @@ -0,0 +1,9 @@ +from pydantic import BaseModel, Field, HttpUrl + + +class VeniceImageVision(BaseModel): + """Input for the Image Vision tool.""" + + image_url: HttpUrl = Field( + description="The URL of the image to to be described by the Vision model. Must be a publicly accessible URL.", + ) diff --git a/skills/venice_image/input.py b/skills/venice_image/input.py deleted file mode 100644 index 16f487c9..00000000 --- a/skills/venice_image/input.py +++ /dev/null @@ -1,113 +0,0 @@ -from typing import Optional - -from pydantic import BaseModel, Field - -STYLE_PRESETS = [ - "3D Model", - "Analog Film", - "Anime", - "Cinematic", - "Comic Book", - "Craft Clay", - "Digital Art", - "Enhance", - "Fantasy Art", - "Isometric Style", - "Line Art", - "Lowpoly", - "Neon Punk", - "Origami", - "Photographic", - "Pixel Art", - "Texture", - "Advertising", - "Food Photography", - "Real Estate", - "Abstract", - "Cubist", - "Graffiti", - "Hyperrealism", - "Impressionist", - "Pointillism", - "Pop Art", - "Psychedelic", - "Renaissance", - "Steampunk", - "Surrealist", - "Typography", - "Watercolor", - "Fighting Game", - "GTA", - "Super Mario", - "Minecraft", - "Pokemon", - "Retro Arcade", - "Retro Game", - "RPG Fantasy Game", - "Strategy Game", - "Street Fighter", - "Legend of Zelda", - "Architectural", - "Disco", - "Dreamscape", - "Dystopian", - "Fairy Tale", - "Gothic", - "Grunge", - "Horror", - "Minimalist", - "Monochrome", - "Nautical", - "Space", - "Stained Glass", - "Techwear Fashion", - "Tribal", - "Zentangle", - "Collage", - "Flat Papercut", - "Kirigami", - "Paper Mache", - "Paper Quilling", - "Papercut Collage", - "Papercut Shadow Box", - "Stacked Papercut", - "Thick Layered Papercut", - "Alien", - "Film Noir", - "HDR", - "Long Exposure", - "Neon Noir", - "Silhouette", - "Tilt-Shift", -] - -STYLE_PRESETS_DESCRIPTION = ( - "Optional style preset to apply. Available options: " - + ", ".join([f"'{s}'" for s in STYLE_PRESETS]) - + ". Defaults to 'Photographic'." -) - - -class VeniceImageGenerationInput(BaseModel): - """Input for General Image Generation Input tool.""" - - prompt: str = Field( - description="Text prompt describing the image to generate.", - ) - negative_prompt: Optional[str] = Field( - None, - description="Negative prompt describing what to avoid in the generated image. If not provided, the default from the agent config will be used.", - ) - width: Optional[int] = Field( - default=1024, - le=2048, - description="Width of the generated image (up to 2048).", - ) - height: Optional[int] = Field( - default=1024, - le=2048, - description="Height of the generated image (up to 2048).", - ) - style_preset: Optional[str] = Field( - default="Photographic", description=STYLE_PRESETS_DESCRIPTION - ) diff --git a/skills/venice_image/schema.json b/skills/venice_image/schema.json index 2fc200c6..1ecbf7f9 100644 --- a/skills/venice_image/schema.json +++ b/skills/venice_image/schema.json @@ -20,6 +20,54 @@ "title": "Skill States", "description": "States for each Venice Image skill (disabled, public, or private)", "properties": { + "image_vision": { + "type": "string", + "title": "Image Vision", + "enum": [ + "disabled", + "public", + "private" + ], + "x-enum-title": [ + "Disabled", + "Agent Owner + All Users", + "Agent Owner Only" + ], + "description": "Describes an image provided via URL using the Venice AI API. Ideal for understanding the content of an existing image", + "default": "public" + }, + "image_enchance": { + "type": "string", + "title": "Image Enchance", + "enum": [ + "disabled", + "public", + "private" + ], + "x-enum-title": [ + "Disabled", + "Agent Owner + All Users", + "Agent Owner Only" + ], + "description": "Tool for **Enchance** (modifying specific areas of) an existing image using a selected image model via Venice AI", + "default": "public" + }, + "image_upscale": { + "type": "string", + "title": "Image Upscale", + "enum": [ + "disabled", + "public", + "private" + ], + "x-enum-title": [ + "Disabled", + "Agent Owner + All Users", + "Agent Owner Only" + ], + "description": "Upscale an existing image by 2x or 4x using Venice AI.", + "default": "disabled" + }, "image_generation_flux_dev": { "type": "string", "title": "Image Generation (Flux-Dev)", @@ -34,7 +82,7 @@ "Agent Owner Only" ], "description": "Generate images using Venice AI's Flux Dev model (research, art workflows).", - "default": "disabled" + "default": "public" }, "image_generation_flux_dev_uncensored": { "type": "string", @@ -137,13 +185,19 @@ "safe_mode": { "type": "boolean", "title": "Safe Mode", - "description": "If true, explicit images will be blurred.", + "description": "Whether to use safe mode. If enabled, this will blur images that are classified as having adult content", "default": true }, + "embed_exif_metadata": { + "type": "boolean", + "title": "Embed Exif Metadata", + "description": "Embed prompt generation information into the image's EXIF metadata", + "default": false + }, "hide_watermark": { "type": "boolean", "title": "Hide Watermark", - "description": "If true, the generated image will not have a watermark.", + "description": "Whether to hide the Venice watermark. Venice may ignore this parameter for certain generated content.", "default": true }, "negative_prompt": { diff --git a/skills/venice_image/utils.py b/skills/venice_image/utils.py new file mode 100644 index 00000000..4d8d921a --- /dev/null +++ b/skills/venice_image/utils.py @@ -0,0 +1,78 @@ +import base64 +import io +import logging +from typing import Optional + +import filetype +import httpx +from PIL import Image +from pydantic import HttpUrl + +from skills.base import ToolException + +logger = logging.getLogger(__name__) + + +async def fetch_image_as_bytes(image_url: HttpUrl) -> bytes: + """Fetches image bytes from a given URL. Converts unsupported formats to PNG using Pillow. + + Raises: + ToolException: If fetching or converting the image fails. + """ + try: + async with httpx.AsyncClient(timeout=90) as client: + response = await client.get(str(image_url), follow_redirects=True) + response.raise_for_status() + + original_bytes = response.content + + # Guess file type from content + kind = filetype.guess(original_bytes) + detected_ext = kind.extension if kind else None + detected_mime = kind.mime if kind else "unknown" + + if not detected_ext or not detected_mime.startswith("image/"): + msg = f"URL {image_url} did not return a recognizable image format. Detected: {detected_mime}" + logger.error(msg) + raise ToolException(msg) + + if detected_ext in ("jpg", "jpeg", "png"): + return original_bytes + + # Convert unsupported image to PNG + try: + img = Image.open(io.BytesIO(original_bytes)).convert("RGBA") + with io.BytesIO() as output: + img.save(output, format="PNG") + logger.info( + f"Converted unsupported image type '{detected_ext}' to PNG." + ) + return output.getvalue() + except Exception as e: + msg = f"Failed to convert image ({detected_ext}) to PNG: {e}" + logger.error(msg, exc_info=True) + raise ToolException(msg) from e + + except httpx.HTTPStatusError as e: + msg = f"HTTP error fetching image {image_url}: Status {e.response.status_code}" + logger.error(msg) + raise ToolException(msg) from e + except httpx.RequestError as e: + msg = f"Network error fetching image {image_url}: {e}" + logger.error(msg) + raise ToolException(msg) from e + except Exception as e: + msg = f"Unexpected error fetching image {image_url}: {e}" + logger.error(msg, exc_info=True) + raise ToolException(msg) from e + + +async def fetch_image_as_base64(image_url: HttpUrl) -> Optional[str]: + """Fetches an image from the URL and returns the image as a Base64-encoded string.""" + image_bytes = await fetch_image_as_bytes(image_url) + + if image_bytes is None: + return None + + # Convert image bytes to a Base64-encoded string + return base64.b64encode(image_bytes).decode("utf-8")