You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
80 lines
2.8 KiB
80 lines
2.8 KiB
import gradio as gr |
|
from langchain_community.llms import Ollama |
|
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler |
|
from langchain.schema import HumanMessage, AIMessage, SystemMessage |
|
from typing import List, Dict, Any |
|
from langchain.chat_models import init_chat_model |
|
|
|
# Initialize Ollama model with streaming capability |
|
ollama_model_name = "gemma3" # Change to your preferred model |
|
llm = init_chat_model("qwen2.5-coder:14b", model_provider="ollama", temperature=0, |
|
streaming=True, # Enable streaming |
|
) |
|
|
|
# Store conversation history |
|
conversation_history = [] |
|
|
|
def add_message_to_history(role: str, content: str): |
|
"""Add a message to the conversation history.""" |
|
if role == "human": |
|
conversation_history.append(HumanMessage(content=content)) |
|
elif role == "ai": |
|
conversation_history.append(AIMessage(content=content)) |
|
elif role == "system": |
|
conversation_history.append(SystemMessage(content=content)) |
|
return conversation_history |
|
|
|
# Initialize with a system message |
|
add_message_to_history("system", "You are a helpful, friendly AI assistant.") |
|
|
|
def stream_response(message: str, history: List[List[str]]): |
|
"""Process user message and stream the response.""" |
|
# Add user message to history |
|
add_message_to_history("human", message) |
|
|
|
# Create a generator to stream responses |
|
response = "" |
|
for chunk in llm.stream([m for m in conversation_history]): |
|
# Extract content from AIMessageChunk |
|
if hasattr(chunk, 'content'): |
|
chunk_content = chunk.content |
|
else: |
|
chunk_content = str(chunk) |
|
|
|
response += chunk_content |
|
yield response |
|
|
|
# Add AI response to history when complete |
|
add_message_to_history("ai", response) |
|
|
|
# Create Gradio interface with streaming |
|
with gr.Blocks() as demo: |
|
gr.Markdown("# Ollama Chatbot with Streaming") |
|
|
|
chatbot = gr.Chatbot(height=500) |
|
msg = gr.Textbox(placeholder="Type your message here...", container=False) |
|
clear = gr.Button("Clear Chat") |
|
|
|
def user(message, history): |
|
# Return immediately for the user message |
|
return "", history + [[message, None]] |
|
|
|
def bot(history): |
|
# Process the last user message |
|
user_message = history[-1][0] |
|
history[-1][1] = "" # Initialize bot's response |
|
|
|
for response in stream_response(user_message, history): |
|
history[-1][1] = response |
|
yield history |
|
|
|
msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then( |
|
bot, chatbot, chatbot |
|
) |
|
|
|
clear.click(lambda: None, None, chatbot, queue=False) |
|
|
|
if __name__ == "__main__": |
|
# Launch the Gradio interface |
|
demo.queue() |
|
demo.launch(share=False) # Set share=True to create a public link
|
|
|