import base64 import io import logging import os from typing import Optional from PIL import Image from fastmcp import FastMCP import openai # Configure logging import tempfile import os # Use a temporary directory for logs or current working directory log_dir = os.environ.get('MCP_LOG_DIR', tempfile.gettempdir()) log_file = os.path.join(log_dir, 'image_server.log') logging.basicConfig( level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', filename=log_file ) logger = logging.getLogger(__name__) # Create FastMCP server instance mcp = FastMCP("ImageRecognitionServer") # Get OpenAI API key from environment OPENAI_API_KEY = os.getenv('OPENAI_API_KEY', '') HAS_OPENAI = bool(OPENAI_API_KEY and OPENAI_API_KEY != 'your-openai-api-key-here') if HAS_OPENAI: openai.api_key = OPENAI_API_KEY logger.info("OpenAI API key configured - AI descriptions enabled") else: logger.warning("No valid OpenAI API key - using basic image metadata only") @mcp.tool() def describe_image(image_data: str, mime_type: str = 'image/jpeg') -> str: """ Describe an image using base64 encoded image data Args: image_data: Base64 encoded image data mime_type: MIME type of the image (default: image/jpeg) Returns: Detailed description of the image """ try: logger.debug(f"Describing image - MIME type: {mime_type}") # Decode base64 image image_bytes = base64.b64decode(image_data) image = Image.open(io.BytesIO(image_bytes)) # Log image details logger.info(f"Image size: {image.size}, mode: {image.mode}") # If OpenAI is available, use Vision API if HAS_OPENAI: try: response = openai.chat.completions.create( model="gpt-4o-mini", messages=[ { "role": "user", "content": [ {"type": "text", "text": "Describe this image in detail, including objects, colors, composition, and any text visible."}, { "type": "image_url", "image_url": { "url": f"data:{mime_type};base64,{image_data}" } } ] } ], max_tokens=500 ) description = response.choices[0].message.content logger.debug(f"OpenAI description: {description}") return description except Exception as e: logger.error(f"OpenAI API error: {str(e)}", exc_info=True) # Fall back to basic metadata return f"OpenAI API error: {str(e)}\n\nBasic metadata:\n- Size: {image.size[0]}x{image.size[1]} pixels\n- Mode: {image.mode}\n- Format: {image.format or 'Unknown'}" # Return basic metadata if no OpenAI description = f"Image Analysis (Basic Metadata):\n- Size: {image.size[0]}x{image.size[1]} pixels\n- Mode: {image.mode}\n- Format: {image.format or 'Unknown'}\n\nNote: For AI-powered descriptions, configure OPENAI_API_KEY in MCP settings." logger.debug(f"Returning basic description: {description}") return description except Exception as e: logger.error(f"Error describing image: {str(e)}", exc_info=True) return f"Error describing image: {str(e)}" @mcp.tool() def describe_image_from_file(file_path: str) -> str: """ Describe an image from a file path Args: file_path: Path to the image file Returns: Detailed description of the image """ try: logger.debug(f"Describing image from file: {file_path}") # Open the image file with open(file_path, 'rb') as image_file: # Encode image to base64 image_data = base64.b64encode(image_file.read()).decode('utf-8') # Determine MIME type from file extension mime_type = 'image/jpeg' if file_path.lower().endswith('.png'): mime_type = 'image/png' elif file_path.lower().endswith('.gif'): mime_type = 'image/gif' elif file_path.lower().endswith('.webp'): mime_type = 'image/webp' # Use the describe_image function return describe_image(image_data, mime_type) except Exception as e: logger.error(f"Error reading image file: {str(e)}", exc_info=True) return f"Error reading image file: {str(e)}" @mcp.tool() def ask_image_question(file_path: str, prompt: str) -> str: """ Ask a specific question about an image using AI vision Args: file_path: Path to the image file prompt: The question or instruction about the image Returns: AI response to the question about the image """ try: logger.debug(f"Asking question about image: {file_path}") logger.debug(f"Question: {prompt}") # Open and encode the image file with open(file_path, 'rb') as image_file: image_data = base64.b64encode(image_file.read()).decode('utf-8') # Determine MIME type from file extension mime_type = 'image/jpeg' if file_path.lower().endswith('.png'): mime_type = 'image/png' elif file_path.lower().endswith('.gif'): mime_type = 'image/gif' elif file_path.lower().endswith('.webp'): mime_type = 'image/webp' # Load image for basic metadata fallback image_bytes = base64.b64decode(image_data) image = Image.open(io.BytesIO(image_bytes)) # If OpenAI is available, use Vision API with custom prompt if HAS_OPENAI: try: response = openai.chat.completions.create( model="gpt-4o-mini", messages=[ { "role": "user", "content": [ {"type": "text", "text": prompt}, { "type": "image_url", "image_url": { "url": f"data:{mime_type};base64,{image_data}" } } ] } ], max_tokens=500 ) answer = response.choices[0].message.content logger.debug(f"OpenAI response: {answer}") return answer except Exception as e: logger.error(f"OpenAI API error: {str(e)}", exc_info=True) return f"OpenAI API error: {str(e)}\n\nNote: Configure OPENAI_API_KEY for AI-powered image analysis." # Return error if no OpenAI return f"AI image analysis not available. Please configure OPENAI_API_KEY.\n\nImage metadata:\n- Size: {image.size[0]}x{image.size[1]} pixels\n- Mode: {image.mode}\n- Format: {image.format or 'Unknown'}" except Exception as e: logger.error(f"Error processing image question: {str(e)}", exc_info=True) return f"Error processing image question: {str(e)}" @mcp.tool() def generate_image_dalle(prompt: str, save_path: str, size: str = "1024x1024", quality: str = "standard", style: str = "vivid", n: int = 1) -> str: """ Generate an image using DALL-E API and save it to the specified path Args: prompt: Description of the image to generate save_path: Absolute path where to save the generated image(s) size: Image size - options: "1024x1024", "1792x1024", "1024x1792" (default: "1024x1024") quality: Image quality - options: "standard", "hd" (default: "standard") style: Image style - options: "vivid", "natural" (default: "vivid") n: Number of images to generate (1-10, default: 1) Returns: Success message with saved file paths and metadata """ import requests from pathlib import Path try: logger.debug(f"Generating image with DALL-E") logger.debug(f"Prompt: {prompt}") logger.debug(f"Save path: {save_path}") logger.debug(f"Size: {size}, Quality: {quality}, Style: {style}, Count: {n}") # Validate parameters valid_sizes = ["1024x1024", "1792x1024", "1024x1792"] if size not in valid_sizes: return f"Error: Invalid size '{size}'. Valid options: {', '.join(valid_sizes)}" valid_qualities = ["standard", "hd"] if quality not in valid_qualities: return f"Error: Invalid quality '{quality}'. Valid options: {', '.join(valid_qualities)}" valid_styles = ["vivid", "natural"] if style not in valid_styles: return f"Error: Invalid style '{style}'. Valid options: {', '.join(valid_styles)}" if not (1 <= n <= 10): return "Error: Number of images must be between 1 and 10" # Validate save path try: save_path = os.path.abspath(save_path) save_dir = os.path.dirname(save_path) # Create directory if it doesn't exist os.makedirs(save_dir, exist_ok=True) # Check if directory is writable if not os.access(save_dir, os.W_OK): return f"Error: Directory '{save_dir}' is not writable" except Exception as e: return f"Error: Invalid save path '{save_path}': {str(e)}" # Check if OpenAI is available if not HAS_OPENAI: return "Error: OpenAI API key not configured. Please set OPENAI_API_KEY to use DALL-E image generation." try: # Generate image using DALL-E 3 response = openai.images.generate( model="dall-e-3", prompt=prompt, size=size, quality=quality, style=style, n=n ) saved_files = [] for i, image_data in enumerate(response.data): try: # Download the image image_response = requests.get(image_data.url, timeout=30) image_response.raise_for_status() # Determine file path for multiple images if n == 1: file_path = save_path else: # For multiple images, add index to filename path_obj = Path(save_path) file_path = str(path_obj.parent / f"{path_obj.stem}_{i+1}{path_obj.suffix}") # Ensure file has proper extension if not file_path.lower().endswith(('.png', '.jpg', '.jpeg')): file_path += '.png' # Save the image with open(file_path, 'wb') as f: f.write(image_response.content) saved_files.append({ 'index': i + 1, 'path': file_path, 'revised_prompt': getattr(image_data, 'revised_prompt', None), 'size_bytes': len(image_response.content) }) logger.info(f"Image {i+1} saved to: {file_path}") except Exception as e: logger.error(f"Error saving image {i+1}: {str(e)}") return f"Error saving image {i+1}: {str(e)}" # Format success message if len(saved_files) == 1: file_info = saved_files[0] message = f"Successfully generated and saved image:\n" message += f" Path: {file_info['path']}\n" message += f" Size: {file_info['size_bytes']:,} bytes\n" if file_info['revised_prompt']: message += f" Revised prompt: {file_info['revised_prompt']}\n" else: message = f"Successfully generated and saved {len(saved_files)} images:\n\n" for file_info in saved_files: message += f"Image {file_info['index']}:\n" message += f" Path: {file_info['path']}\n" message += f" Size: {file_info['size_bytes']:,} bytes\n" if file_info['revised_prompt']: message += f" Revised prompt: {file_info['revised_prompt']}\n" message += "\n" logger.info(f"Successfully generated and saved {len(saved_files)} image(s)") return message.strip() except Exception as e: logger.error(f"DALL-E API error: {str(e)}", exc_info=True) return f"DALL-E API error: {str(e)}" except Exception as e: logger.error(f"Error generating image: {str(e)}", exc_info=True) return f"Error generating image: {str(e)}" def main(): """Main entry point for the MCP server.""" logger.info("Starting MCP Image Recognition Server") mcp.run() if __name__ == "__main__": main()