import base64 import io import logging import os from typing import Optional from PIL import Image from fastmcp import FastMCP import openai # Configure logging logging.basicConfig( level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', filename='/home/enne2/Sviluppo/tetris-sdl/mcp-image-server/image_server.log' ) logger = logging.getLogger(__name__) # Create FastMCP server instance mcp = FastMCP("ImageRecognitionServer") # Get OpenAI API key from environment OPENAI_API_KEY = os.getenv('OPENAI_API_KEY', '') HAS_OPENAI = bool(OPENAI_API_KEY and OPENAI_API_KEY != 'your-openai-api-key-here') if HAS_OPENAI: openai.api_key = OPENAI_API_KEY logger.info("OpenAI API key configured - AI descriptions enabled") else: logger.warning("No valid OpenAI API key - using basic image metadata only") @mcp.tool() def describe_image(image_data: str, mime_type: str = 'image/jpeg') -> str: """ Describe an image using base64 encoded image data Args: image_data: Base64 encoded image data mime_type: MIME type of the image (default: image/jpeg) Returns: Detailed description of the image """ try: logger.debug(f"Describing image - MIME type: {mime_type}") # Decode base64 image image_bytes = base64.b64decode(image_data) image = Image.open(io.BytesIO(image_bytes)) # Log image details logger.info(f"Image size: {image.size}, mode: {image.mode}") # If OpenAI is available, use Vision API if HAS_OPENAI: try: response = openai.chat.completions.create( model="gpt-4o-mini", messages=[ { "role": "user", "content": [ {"type": "text", "text": "Describe this image in detail, including objects, colors, composition, and any text visible."}, { "type": "image_url", "image_url": { "url": f"data:{mime_type};base64,{image_data}" } } ] } ], max_tokens=500 ) description = response.choices[0].message.content logger.debug(f"OpenAI description: {description}") return description except Exception as e: logger.error(f"OpenAI API error: {str(e)}", exc_info=True) # Fall back to basic metadata return f"OpenAI API error: {str(e)}\n\nBasic metadata:\n- Size: {image.size[0]}x{image.size[1]} pixels\n- Mode: {image.mode}\n- Format: {image.format or 'Unknown'}" # Return basic metadata if no OpenAI description = f"Image Analysis (Basic Metadata):\n- Size: {image.size[0]}x{image.size[1]} pixels\n- Mode: {image.mode}\n- Format: {image.format or 'Unknown'}\n\nNote: For AI-powered descriptions, configure OPENAI_API_KEY in MCP settings." logger.debug(f"Returning basic description: {description}") return description except Exception as e: logger.error(f"Error describing image: {str(e)}", exc_info=True) return f"Error describing image: {str(e)}" @mcp.tool() def describe_image_from_file(file_path: str) -> str: """ Describe an image from a file path Args: file_path: Path to the image file Returns: Detailed description of the image """ try: logger.debug(f"Describing image from file: {file_path}") # Open the image file with open(file_path, 'rb') as image_file: # Encode image to base64 image_data = base64.b64encode(image_file.read()).decode('utf-8') # Determine MIME type from file extension mime_type = 'image/jpeg' if file_path.lower().endswith('.png'): mime_type = 'image/png' elif file_path.lower().endswith('.gif'): mime_type = 'image/gif' elif file_path.lower().endswith('.webp'): mime_type = 'image/webp' # Use the describe_image function return describe_image(image_data, mime_type) except Exception as e: logger.error(f"Error reading image file: {str(e)}", exc_info=True) return f"Error reading image file: {str(e)}" def main(): """Main entry point for the MCP server.""" logger.info("Starting MCP Image Recognition Server") mcp.run() if __name__ == "__main__": main()