Add save_path parameter to generate_image_dalle tool

- Modified generate_image_dalle to require save_path parameter - Images are now downloaded and saved locally to specified path - Added support for multiple images with automatic indexing - Enhanced error handling for file operations - Updated documentation and configuration files - Added requests dependency for image downloading
3 months ago · 118fa3f703
3 changed files with 98 additions and 30 deletions
--- a/README.md
+++ b/README.md
@ -89,18 +89,31 @@ Ask a specific question about an image using AI vision.
 **Example usage:** "What color is the car in this image?", "How many people are in this photo?", "What text is visible in this image?"
 ### 4. generate_image_dalle
-Generate images using OpenAI's DALL-E API.
+Generate images using OpenAI's DALL-E 3 API and save them to a specified path.
 **Parameters:**
 - `prompt` (string, required): Description of the image to generate
 - `save_path` (string, required): Absolute path where to save the generated image(s)
 - `size` (string, optional): Image size - options: "1024x1024", "1792x1024", "1024x1792" (default: "1024x1024")
 - `quality` (string, optional): Image quality - options: "standard", "hd" (default: "standard")
 - `style` (string, optional): Image style - options: "vivid", "natural" (default: "vivid")
 - `n` (integer, optional): Number of images to generate (1-10, default: 1)
-**Returns:** Generated image URLs and metadata
+**Returns:** Success message with saved file paths and image metadata
-**Example prompts:** "A futuristic city skyline at sunset", "A cute robot playing with a cat", "Abstract art with blue and gold colors"
+**Example usage:** 
 - Generate single image: `prompt="A peaceful mountain landscape", save_path="/home/user/images/mountain.png"`
 - Generate multiple images: `prompt="Abstract art", save_path="/home/user/art/abstract.png", n=3` (saves as abstract_1.png, abstract_2.png, abstract_3.png)
 - High quality image: `prompt="Professional logo", save_path="/home/user/logo.png", quality="hd", size="1792x1024"`
 **Features:**
 - Automatically creates directories if they don't exist
 - Downloads and saves images locally from DALL-E URLs
 - Handles multiple images with automatic filename indexing
 - Validates file paths and permissions
 - Reports file sizes and revised prompts
 **Note:** Requires OpenAI API key with DALL-E 3 access. Generated images are saved locally and URLs are temporary.
 ## Example Usage
@ -120,8 +133,9 @@ What text can you read in /path/to/document.jpg?
 **Generate Images:**
 ```
-Generate an image: "A peaceful mountain landscape at sunrise"
+Generate an image: "A peaceful mountain landscape at sunrise" and save it to "/home/user/mountain.png"
-Create a high-quality image of "A futuristic robot in a cyberpunk city" in 1792x1024 size
+Create a high-quality image of "A futuristic robot in a cyberpunk city" in 1792x1024 size and save to "/home/user/robot.png"
 Generate 3 images of "Abstract geometric patterns" and save to "/home/user/patterns.png"
 ```
 The AI will use the appropriate tools (`describe_image_from_file`, `ask_image_question`, or `generate_image_dalle`) to provide detailed responses.
--- a/image_recognition_server/server.py
+++ b/image_recognition_server/server.py
@ -205,23 +205,28 @@ def ask_image_question(file_path: str, prompt: str) -> str:
        return f"Error processing image question: {str(e)}"
@mcp.tool()
-def generate_image_dalle(prompt: str, size: str = "1024x1024", quality: str = "standard", style: str = "vivid", n: int = 1) -> str:
+def generate_image_dalle(prompt: str, save_path: str, size: str = "1024x1024", quality: str = "standard", style: str = "vivid", n: int = 1) -> str:
    """
-    Generate an image using DALL-E API
+    Generate an image using DALL-E API and save it to the specified path
    Args:
        prompt: Description of the image to generate
        save_path: Absolute path where to save the generated image(s)
        size: Image size - options: "1024x1024", "1792x1024", "1024x1792" (default: "1024x1024")
        quality: Image quality - options: "standard", "hd" (default: "standard")
        style: Image style - options: "vivid", "natural" (default: "vivid")
        n: Number of images to generate (1-10, default: 1)
    Returns:
-        JSON response with generated image URLs and metadata
+        Success message with saved file paths and metadata
    """
    import requests
    from pathlib import Path
    try:
        logger.debug(f"Generating image with DALL-E")
        logger.debug(f"Prompt: {prompt}")
        logger.debug(f"Save path: {save_path}")
        logger.debug(f"Size: {size}, Quality: {quality}, Style: {style}, Count: {n}")
        # Validate parameters
@ -240,6 +245,21 @@ def generate_image_dalle(prompt: str, size: str = "1024x1024", quality: str = "s
        if not (1 <= n <= 10):
            return "Error: Number of images must be between 1 and 10"
        # Validate save path
        try:
            save_path = os.path.abspath(save_path)
            save_dir = os.path.dirname(save_path)
            # Create directory if it doesn't exist
            os.makedirs(save_dir, exist_ok=True)
            # Check if directory is writable
            if not os.access(save_dir, os.W_OK):
                return f"Error: Directory '{save_dir}' is not writable"
        except Exception as e:
            return f"Error: Invalid save path '{save_path}': {str(e)}"
        # Check if OpenAI is available
        if not HAS_OPENAI:
            return "Error: OpenAI API key not configured. Please set OPENAI_API_KEY to use DALL-E image generation."
@ -255,30 +275,63 @@ def generate_image_dalle(prompt: str, size: str = "1024x1024", quality: str = "s
                n=n
            )
-            # Format response
+            saved_files = []
            result = {
                "prompt": prompt,
                "parameters": {
                    "size": size,
                    "quality": quality,
                    "style": style,
                    "count": n
                },
                "images": []
            }
            for i, image_data in enumerate(response.data):
-                result["images"].append({
+                try:
-                    "index": i + 1,
+                    # Download the image
-                    "url": image_data.url,
+                    image_response = requests.get(image_data.url, timeout=30)
-                    "revised_prompt": getattr(image_data, 'revised_prompt', None)
+                    image_response.raise_for_status()
                    # Determine file path for multiple images
                    if n == 1:
                        file_path = save_path
                    else:
                        # For multiple images, add index to filename
                        path_obj = Path(save_path)
                        file_path = str(path_obj.parent / f"{path_obj.stem}_{i+1}{path_obj.suffix}")
                    # Ensure file has proper extension
                    if not file_path.lower().endswith(('.png', '.jpg', '.jpeg')):
                        file_path += '.png'
                    # Save the image
                    with open(file_path, 'wb') as f:
                        f.write(image_response.content)
                    saved_files.append({
                        'index': i + 1,
                        'path': file_path,
                        'revised_prompt': getattr(image_data, 'revised_prompt', None),
                        'size_bytes': len(image_response.content)
                    })
-            logger.info(f"Successfully generated {len(response.data)} image(s)")
+                    logger.info(f"Image {i+1} saved to: {file_path}")
-            return f"Successfully generated {len(response.data)} image(s):\n\n" + "\n".join([
+                    
-                f"Image {img['index']}:\n  URL: {img['url']}\n  Revised prompt: {img['revised_prompt'] or 'N/A'}"
+                except Exception as e:
-                for img in result["images"]
+                    logger.error(f"Error saving image {i+1}: {str(e)}")
-            ])
+                    return f"Error saving image {i+1}: {str(e)}"
            # Format success message
            if len(saved_files) == 1:
                file_info = saved_files[0]
                message = f"Successfully generated and saved image:\n"
                message += f"  Path: {file_info['path']}\n"
                message += f"  Size: {file_info['size_bytes']:,} bytes\n"
                if file_info['revised_prompt']:
                    message += f"  Revised prompt: {file_info['revised_prompt']}\n"
            else:
                message = f"Successfully generated and saved {len(saved_files)} images:\n\n"
                for file_info in saved_files:
                    message += f"Image {file_info['index']}:\n"
                    message += f"  Path: {file_info['path']}\n"
                    message += f"  Size: {file_info['size_bytes']:,} bytes\n"
                    if file_info['revised_prompt']:
                        message += f"  Revised prompt: {file_info['revised_prompt']}\n"
                    message += "\n"
            logger.info(f"Successfully generated and saved {len(saved_files)} image(s)")
            return message.strip()
        except Exception as e:
            logger.error(f"DALL-E API error: {str(e)}", exc_info=True)
--- a/requirements.txt
+++ b/requirements.txt
@ -3,3 +3,4 @@ pillow
 python-multipart
 uvicorn
 openai
 requests