From 118fa3f70342951a68e8246864b5729202726b99 Mon Sep 17 00:00:00 2001 From: Matteo Benedetto Date: Thu, 2 Oct 2025 16:53:56 +0200 Subject: [PATCH] Add save_path parameter to generate_image_dalle tool - Modified generate_image_dalle to require save_path parameter - Images are now downloaded and saved locally to specified path - Added support for multiple images with automatic indexing - Enhanced error handling for file operations - Updated documentation and configuration files - Added requests dependency for image downloading --- README.md | 24 +++++-- image_recognition_server/server.py | 101 ++++++++++++++++++++++------- requirements.txt | 3 +- 3 files changed, 98 insertions(+), 30 deletions(-) diff --git a/README.md b/README.md index 9ac0376..b063914 100644 --- a/README.md +++ b/README.md @@ -89,18 +89,31 @@ Ask a specific question about an image using AI vision. **Example usage:** "What color is the car in this image?", "How many people are in this photo?", "What text is visible in this image?" ### 4. generate_image_dalle -Generate images using OpenAI's DALL-E API. +Generate images using OpenAI's DALL-E 3 API and save them to a specified path. **Parameters:** - `prompt` (string, required): Description of the image to generate +- `save_path` (string, required): Absolute path where to save the generated image(s) - `size` (string, optional): Image size - options: "1024x1024", "1792x1024", "1024x1792" (default: "1024x1024") - `quality` (string, optional): Image quality - options: "standard", "hd" (default: "standard") - `style` (string, optional): Image style - options: "vivid", "natural" (default: "vivid") - `n` (integer, optional): Number of images to generate (1-10, default: 1) -**Returns:** Generated image URLs and metadata +**Returns:** Success message with saved file paths and image metadata -**Example prompts:** "A futuristic city skyline at sunset", "A cute robot playing with a cat", "Abstract art with blue and gold colors" +**Example usage:** +- Generate single image: `prompt="A peaceful mountain landscape", save_path="/home/user/images/mountain.png"` +- Generate multiple images: `prompt="Abstract art", save_path="/home/user/art/abstract.png", n=3` (saves as abstract_1.png, abstract_2.png, abstract_3.png) +- High quality image: `prompt="Professional logo", save_path="/home/user/logo.png", quality="hd", size="1792x1024"` + +**Features:** +- Automatically creates directories if they don't exist +- Downloads and saves images locally from DALL-E URLs +- Handles multiple images with automatic filename indexing +- Validates file paths and permissions +- Reports file sizes and revised prompts + +**Note:** Requires OpenAI API key with DALL-E 3 access. Generated images are saved locally and URLs are temporary. ## Example Usage @@ -120,8 +133,9 @@ What text can you read in /path/to/document.jpg? **Generate Images:** ``` -Generate an image: "A peaceful mountain landscape at sunrise" -Create a high-quality image of "A futuristic robot in a cyberpunk city" in 1792x1024 size +Generate an image: "A peaceful mountain landscape at sunrise" and save it to "/home/user/mountain.png" +Create a high-quality image of "A futuristic robot in a cyberpunk city" in 1792x1024 size and save to "/home/user/robot.png" +Generate 3 images of "Abstract geometric patterns" and save to "/home/user/patterns.png" ``` The AI will use the appropriate tools (`describe_image_from_file`, `ask_image_question`, or `generate_image_dalle`) to provide detailed responses. diff --git a/image_recognition_server/server.py b/image_recognition_server/server.py index 8bac63e..8a2d69a 100644 --- a/image_recognition_server/server.py +++ b/image_recognition_server/server.py @@ -205,23 +205,28 @@ def ask_image_question(file_path: str, prompt: str) -> str: return f"Error processing image question: {str(e)}" @mcp.tool() -def generate_image_dalle(prompt: str, size: str = "1024x1024", quality: str = "standard", style: str = "vivid", n: int = 1) -> str: +def generate_image_dalle(prompt: str, save_path: str, size: str = "1024x1024", quality: str = "standard", style: str = "vivid", n: int = 1) -> str: """ - Generate an image using DALL-E API + Generate an image using DALL-E API and save it to the specified path Args: prompt: Description of the image to generate + save_path: Absolute path where to save the generated image(s) size: Image size - options: "1024x1024", "1792x1024", "1024x1792" (default: "1024x1024") quality: Image quality - options: "standard", "hd" (default: "standard") style: Image style - options: "vivid", "natural" (default: "vivid") n: Number of images to generate (1-10, default: 1) Returns: - JSON response with generated image URLs and metadata + Success message with saved file paths and metadata """ + import requests + from pathlib import Path + try: logger.debug(f"Generating image with DALL-E") logger.debug(f"Prompt: {prompt}") + logger.debug(f"Save path: {save_path}") logger.debug(f"Size: {size}, Quality: {quality}, Style: {style}, Count: {n}") # Validate parameters @@ -240,6 +245,21 @@ def generate_image_dalle(prompt: str, size: str = "1024x1024", quality: str = "s if not (1 <= n <= 10): return "Error: Number of images must be between 1 and 10" + # Validate save path + try: + save_path = os.path.abspath(save_path) + save_dir = os.path.dirname(save_path) + + # Create directory if it doesn't exist + os.makedirs(save_dir, exist_ok=True) + + # Check if directory is writable + if not os.access(save_dir, os.W_OK): + return f"Error: Directory '{save_dir}' is not writable" + + except Exception as e: + return f"Error: Invalid save path '{save_path}': {str(e)}" + # Check if OpenAI is available if not HAS_OPENAI: return "Error: OpenAI API key not configured. Please set OPENAI_API_KEY to use DALL-E image generation." @@ -255,30 +275,63 @@ def generate_image_dalle(prompt: str, size: str = "1024x1024", quality: str = "s n=n ) - # Format response - result = { - "prompt": prompt, - "parameters": { - "size": size, - "quality": quality, - "style": style, - "count": n - }, - "images": [] - } + saved_files = [] for i, image_data in enumerate(response.data): - result["images"].append({ - "index": i + 1, - "url": image_data.url, - "revised_prompt": getattr(image_data, 'revised_prompt', None) - }) + try: + # Download the image + image_response = requests.get(image_data.url, timeout=30) + image_response.raise_for_status() + + # Determine file path for multiple images + if n == 1: + file_path = save_path + else: + # For multiple images, add index to filename + path_obj = Path(save_path) + file_path = str(path_obj.parent / f"{path_obj.stem}_{i+1}{path_obj.suffix}") + + # Ensure file has proper extension + if not file_path.lower().endswith(('.png', '.jpg', '.jpeg')): + file_path += '.png' + + # Save the image + with open(file_path, 'wb') as f: + f.write(image_response.content) + + saved_files.append({ + 'index': i + 1, + 'path': file_path, + 'revised_prompt': getattr(image_data, 'revised_prompt', None), + 'size_bytes': len(image_response.content) + }) + + logger.info(f"Image {i+1} saved to: {file_path}") + + except Exception as e: + logger.error(f"Error saving image {i+1}: {str(e)}") + return f"Error saving image {i+1}: {str(e)}" + + # Format success message + if len(saved_files) == 1: + file_info = saved_files[0] + message = f"Successfully generated and saved image:\n" + message += f" Path: {file_info['path']}\n" + message += f" Size: {file_info['size_bytes']:,} bytes\n" + if file_info['revised_prompt']: + message += f" Revised prompt: {file_info['revised_prompt']}\n" + else: + message = f"Successfully generated and saved {len(saved_files)} images:\n\n" + for file_info in saved_files: + message += f"Image {file_info['index']}:\n" + message += f" Path: {file_info['path']}\n" + message += f" Size: {file_info['size_bytes']:,} bytes\n" + if file_info['revised_prompt']: + message += f" Revised prompt: {file_info['revised_prompt']}\n" + message += "\n" - logger.info(f"Successfully generated {len(response.data)} image(s)") - return f"Successfully generated {len(response.data)} image(s):\n\n" + "\n".join([ - f"Image {img['index']}:\n URL: {img['url']}\n Revised prompt: {img['revised_prompt'] or 'N/A'}" - for img in result["images"] - ]) + logger.info(f"Successfully generated and saved {len(saved_files)} image(s)") + return message.strip() except Exception as e: logger.error(f"DALL-E API error: {str(e)}", exc_info=True) diff --git a/requirements.txt b/requirements.txt index 7993c31..a36e372 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,4 +2,5 @@ fastmcp pillow python-multipart uvicorn -openai \ No newline at end of file +openai +requests \ No newline at end of file