You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

136 lines
4.7 KiB

import base64
import io
import logging
import os
from typing import Optional
from PIL import Image
from fastmcp import FastMCP
import openai
# Configure logging
logging.basicConfig(
level=logging.DEBUG,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
filename='/home/enne2/Sviluppo/tetris-sdl/mcp-image-server/image_server.log'
)
logger = logging.getLogger(__name__)
# Create FastMCP server instance
mcp = FastMCP("ImageRecognitionServer")
# Get OpenAI API key from environment
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY', '')
HAS_OPENAI = bool(OPENAI_API_KEY and OPENAI_API_KEY != 'your-openai-api-key-here')
if HAS_OPENAI:
openai.api_key = OPENAI_API_KEY
logger.info("OpenAI API key configured - AI descriptions enabled")
else:
logger.warning("No valid OpenAI API key - using basic image metadata only")
@mcp.tool()
def describe_image(image_data: str, mime_type: str = 'image/jpeg') -> str:
"""
Describe an image using base64 encoded image data
Args:
image_data: Base64 encoded image data
mime_type: MIME type of the image (default: image/jpeg)
Returns:
Detailed description of the image
"""
try:
logger.debug(f"Describing image - MIME type: {mime_type}")
# Decode base64 image
image_bytes = base64.b64decode(image_data)
image = Image.open(io.BytesIO(image_bytes))
# Log image details
logger.info(f"Image size: {image.size}, mode: {image.mode}")
# If OpenAI is available, use Vision API
if HAS_OPENAI:
try:
response = openai.chat.completions.create(
model="gpt-4o-mini",
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": "Describe this image in detail, including objects, colors, composition, and any text visible."},
{
"type": "image_url",
"image_url": {
"url": f"data:{mime_type};base64,{image_data}"
}
}
]
}
],
max_tokens=500
)
description = response.choices[0].message.content
logger.debug(f"OpenAI description: {description}")
return description
except Exception as e:
logger.error(f"OpenAI API error: {str(e)}", exc_info=True)
# Fall back to basic metadata
return f"OpenAI API error: {str(e)}\n\nBasic metadata:\n- Size: {image.size[0]}x{image.size[1]} pixels\n- Mode: {image.mode}\n- Format: {image.format or 'Unknown'}"
# Return basic metadata if no OpenAI
description = f"Image Analysis (Basic Metadata):\n- Size: {image.size[0]}x{image.size[1]} pixels\n- Mode: {image.mode}\n- Format: {image.format or 'Unknown'}\n\nNote: For AI-powered descriptions, configure OPENAI_API_KEY in MCP settings."
logger.debug(f"Returning basic description: {description}")
return description
except Exception as e:
logger.error(f"Error describing image: {str(e)}", exc_info=True)
return f"Error describing image: {str(e)}"
@mcp.tool()
def describe_image_from_file(file_path: str) -> str:
"""
Describe an image from a file path
Args:
file_path: Path to the image file
Returns:
Detailed description of the image
"""
try:
logger.debug(f"Describing image from file: {file_path}")
# Open the image file
with open(file_path, 'rb') as image_file:
# Encode image to base64
image_data = base64.b64encode(image_file.read()).decode('utf-8')
# Determine MIME type from file extension
mime_type = 'image/jpeg'
if file_path.lower().endswith('.png'):
mime_type = 'image/png'
elif file_path.lower().endswith('.gif'):
mime_type = 'image/gif'
elif file_path.lower().endswith('.webp'):
mime_type = 'image/webp'
# Use the describe_image function
return describe_image(image_data, mime_type)
except Exception as e:
logger.error(f"Error reading image file: {str(e)}", exc_info=True)
return f"Error reading image file: {str(e)}"
def main():
"""Main entry point for the MCP server."""
logger.info("Starting MCP Image Recognition Server")
mcp.run()
if __name__ == "__main__":
main()