From 118fa3f70342951a68e8246864b5729202726b99 Mon Sep 17 00:00:00 2001
From: Matteo Benedetto <me@enne2.net>
Date: Thu, 2 Oct 2025 16:53:56 +0200
Subject: [PATCH] Add save_path parameter to generate_image_dalle tool

- Modified generate_image_dalle to require save_path parameter
- Images are now downloaded and saved locally to specified path
- Added support for multiple images with automatic indexing
- Enhanced error handling for file operations
- Updated documentation and configuration files
- Added requests dependency for image downloading
---
 README.md                          |  24 +++++--
 image_recognition_server/server.py | 101 ++++++++++++++++++++++-------
 requirements.txt                   |   3 +-
 3 files changed, 98 insertions(+), 30 deletions(-)

diff --git a/README.md b/README.md
index 9ac0376..b063914 100644
--- a/README.md
+++ b/README.md
@@ -89,18 +89,31 @@ Ask a specific question about an image using AI vision.
 **Example usage:** "What color is the car in this image?", "How many people are in this photo?", "What text is visible in this image?"
 
 ### 4. generate_image_dalle
-Generate images using OpenAI's DALL-E API.
+Generate images using OpenAI's DALL-E 3 API and save them to a specified path.
 
 **Parameters:**
 - `prompt` (string, required): Description of the image to generate
+- `save_path` (string, required): Absolute path where to save the generated image(s)
 - `size` (string, optional): Image size - options: "1024x1024", "1792x1024", "1024x1792" (default: "1024x1024")
 - `quality` (string, optional): Image quality - options: "standard", "hd" (default: "standard")
 - `style` (string, optional): Image style - options: "vivid", "natural" (default: "vivid")
 - `n` (integer, optional): Number of images to generate (1-10, default: 1)
 
-**Returns:** Generated image URLs and metadata
+**Returns:** Success message with saved file paths and image metadata
 
-**Example prompts:** "A futuristic city skyline at sunset", "A cute robot playing with a cat", "Abstract art with blue and gold colors"
+**Example usage:** 
+- Generate single image: `prompt="A peaceful mountain landscape", save_path="/home/user/images/mountain.png"`
+- Generate multiple images: `prompt="Abstract art", save_path="/home/user/art/abstract.png", n=3` (saves as abstract_1.png, abstract_2.png, abstract_3.png)
+- High quality image: `prompt="Professional logo", save_path="/home/user/logo.png", quality="hd", size="1792x1024"`
+
+**Features:**
+- Automatically creates directories if they don't exist
+- Downloads and saves images locally from DALL-E URLs
+- Handles multiple images with automatic filename indexing
+- Validates file paths and permissions
+- Reports file sizes and revised prompts
+
+**Note:** Requires OpenAI API key with DALL-E 3 access. Generated images are saved locally and URLs are temporary.
 
 ## Example Usage
 
@@ -120,8 +133,9 @@ What text can you read in /path/to/document.jpg?
 
 **Generate Images:**
 ```
-Generate an image: "A peaceful mountain landscape at sunrise"
-Create a high-quality image of "A futuristic robot in a cyberpunk city" in 1792x1024 size
+Generate an image: "A peaceful mountain landscape at sunrise" and save it to "/home/user/mountain.png"
+Create a high-quality image of "A futuristic robot in a cyberpunk city" in 1792x1024 size and save to "/home/user/robot.png"
+Generate 3 images of "Abstract geometric patterns" and save to "/home/user/patterns.png"
 ```
 
 The AI will use the appropriate tools (`describe_image_from_file`, `ask_image_question`, or `generate_image_dalle`) to provide detailed responses.
diff --git a/image_recognition_server/server.py b/image_recognition_server/server.py
index 8bac63e..8a2d69a 100644
--- a/image_recognition_server/server.py
+++ b/image_recognition_server/server.py
@@ -205,23 +205,28 @@ def ask_image_question(file_path: str, prompt: str) -> str:
         return f"Error processing image question: {str(e)}"
 
 @mcp.tool()
-def generate_image_dalle(prompt: str, size: str = "1024x1024", quality: str = "standard", style: str = "vivid", n: int = 1) -> str:
+def generate_image_dalle(prompt: str, save_path: str, size: str = "1024x1024", quality: str = "standard", style: str = "vivid", n: int = 1) -> str:
     """
-    Generate an image using DALL-E API
+    Generate an image using DALL-E API and save it to the specified path
     
     Args:
         prompt: Description of the image to generate
+        save_path: Absolute path where to save the generated image(s)
         size: Image size - options: "1024x1024", "1792x1024", "1024x1792" (default: "1024x1024")
         quality: Image quality - options: "standard", "hd" (default: "standard")
         style: Image style - options: "vivid", "natural" (default: "vivid")
         n: Number of images to generate (1-10, default: 1)
     
     Returns:
-        JSON response with generated image URLs and metadata
+        Success message with saved file paths and metadata
     """
+    import requests
+    from pathlib import Path
+    
     try:
         logger.debug(f"Generating image with DALL-E")
         logger.debug(f"Prompt: {prompt}")
+        logger.debug(f"Save path: {save_path}")
         logger.debug(f"Size: {size}, Quality: {quality}, Style: {style}, Count: {n}")
         
         # Validate parameters
@@ -240,6 +245,21 @@ def generate_image_dalle(prompt: str, size: str = "1024x1024", quality: str = "s
         if not (1 <= n <= 10):
             return "Error: Number of images must be between 1 and 10"
         
+        # Validate save path
+        try:
+            save_path = os.path.abspath(save_path)
+            save_dir = os.path.dirname(save_path)
+            
+            # Create directory if it doesn't exist
+            os.makedirs(save_dir, exist_ok=True)
+            
+            # Check if directory is writable
+            if not os.access(save_dir, os.W_OK):
+                return f"Error: Directory '{save_dir}' is not writable"
+                
+        except Exception as e:
+            return f"Error: Invalid save path '{save_path}': {str(e)}"
+        
         # Check if OpenAI is available
         if not HAS_OPENAI:
             return "Error: OpenAI API key not configured. Please set OPENAI_API_KEY to use DALL-E image generation."
@@ -255,30 +275,63 @@ def generate_image_dalle(prompt: str, size: str = "1024x1024", quality: str = "s
                 n=n
             )
             
-            # Format response
-            result = {
-                "prompt": prompt,
-                "parameters": {
-                    "size": size,
-                    "quality": quality,
-                    "style": style,
-                    "count": n
-                },
-                "images": []
-            }
+            saved_files = []
             
             for i, image_data in enumerate(response.data):
-                result["images"].append({
-                    "index": i + 1,
-                    "url": image_data.url,
-                    "revised_prompt": getattr(image_data, 'revised_prompt', None)
-                })
+                try:
+                    # Download the image
+                    image_response = requests.get(image_data.url, timeout=30)
+                    image_response.raise_for_status()
+                    
+                    # Determine file path for multiple images
+                    if n == 1:
+                        file_path = save_path
+                    else:
+                        # For multiple images, add index to filename
+                        path_obj = Path(save_path)
+                        file_path = str(path_obj.parent / f"{path_obj.stem}_{i+1}{path_obj.suffix}")
+                    
+                    # Ensure file has proper extension
+                    if not file_path.lower().endswith(('.png', '.jpg', '.jpeg')):
+                        file_path += '.png'
+                    
+                    # Save the image
+                    with open(file_path, 'wb') as f:
+                        f.write(image_response.content)
+                    
+                    saved_files.append({
+                        'index': i + 1,
+                        'path': file_path,
+                        'revised_prompt': getattr(image_data, 'revised_prompt', None),
+                        'size_bytes': len(image_response.content)
+                    })
+                    
+                    logger.info(f"Image {i+1} saved to: {file_path}")
+                    
+                except Exception as e:
+                    logger.error(f"Error saving image {i+1}: {str(e)}")
+                    return f"Error saving image {i+1}: {str(e)}"
+            
+            # Format success message
+            if len(saved_files) == 1:
+                file_info = saved_files[0]
+                message = f"Successfully generated and saved image:\n"
+                message += f"  Path: {file_info['path']}\n"
+                message += f"  Size: {file_info['size_bytes']:,} bytes\n"
+                if file_info['revised_prompt']:
+                    message += f"  Revised prompt: {file_info['revised_prompt']}\n"
+            else:
+                message = f"Successfully generated and saved {len(saved_files)} images:\n\n"
+                for file_info in saved_files:
+                    message += f"Image {file_info['index']}:\n"
+                    message += f"  Path: {file_info['path']}\n"
+                    message += f"  Size: {file_info['size_bytes']:,} bytes\n"
+                    if file_info['revised_prompt']:
+                        message += f"  Revised prompt: {file_info['revised_prompt']}\n"
+                    message += "\n"
             
-            logger.info(f"Successfully generated {len(response.data)} image(s)")
-            return f"Successfully generated {len(response.data)} image(s):\n\n" + "\n".join([
-                f"Image {img['index']}:\n  URL: {img['url']}\n  Revised prompt: {img['revised_prompt'] or 'N/A'}"
-                for img in result["images"]
-            ])
+            logger.info(f"Successfully generated and saved {len(saved_files)} image(s)")
+            return message.strip()
             
         except Exception as e:
             logger.error(f"DALL-E API error: {str(e)}", exc_info=True)
diff --git a/requirements.txt b/requirements.txt
index 7993c31..a36e372 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,4 +2,5 @@ fastmcp
 pillow
 python-multipart
 uvicorn
-openai
\ No newline at end of file
+openai
+requests
\ No newline at end of file