import gradio as gr from langchain_community.llms import Ollama from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler from langchain.schema import HumanMessage, AIMessage, SystemMessage from typing import List, Dict, Any from langchain.chat_models import init_chat_model # Initialize Ollama model with streaming capability ollama_model_name = "gemma3" # Change to your preferred model llm = init_chat_model("qwen2.5-coder:14b", model_provider="ollama", temperature=0, streaming=True, # Enable streaming ) # Store conversation history conversation_history = [] def add_message_to_history(role: str, content: str): """Add a message to the conversation history.""" if role == "human": conversation_history.append(HumanMessage(content=content)) elif role == "ai": conversation_history.append(AIMessage(content=content)) elif role == "system": conversation_history.append(SystemMessage(content=content)) return conversation_history # Initialize with a system message add_message_to_history("system", "You are a helpful, friendly AI assistant.") def stream_response(message: str, history: List[List[str]]): """Process user message and stream the response.""" # Add user message to history add_message_to_history("human", message) # Create a generator to stream responses response = "" for chunk in llm.stream([m for m in conversation_history]): # Extract content from AIMessageChunk if hasattr(chunk, 'content'): chunk_content = chunk.content else: chunk_content = str(chunk) response += chunk_content yield response # Add AI response to history when complete add_message_to_history("ai", response) # Create Gradio interface with streaming with gr.Blocks() as demo: gr.Markdown("# Ollama Chatbot with Streaming") chatbot = gr.Chatbot(height=500) msg = gr.Textbox(placeholder="Type your message here...", container=False) clear = gr.Button("Clear Chat") def user(message, history): # Return immediately for the user message return "", history + [[message, None]] def bot(history): # Process the last user message user_message = history[-1][0] history[-1][1] = "" # Initialize bot's response for response in stream_response(user_message, history): history[-1][1] = response yield history msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then( bot, chatbot, chatbot ) clear.click(lambda: None, None, chatbot, queue=False) if __name__ == "__main__": # Launch the Gradio interface demo.queue() demo.launch(share=False) # Set share=True to create a public link