qbit-agent/test.py

import gradio as gr
from langchain_community.llms import Ollama
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.schema import HumanMessage, AIMessage, SystemMessage
from typing import List, Dict, Any
from langchain.chat_models import init_chat_model

# Initialize Ollama model with streaming capability
ollama_model_name = "gemma3"  # Change to your preferred model
llm = init_chat_model("qwen2.5-coder:14b", model_provider="ollama", temperature=0,
    streaming=True,  # Enable streaming
)

# Store conversation history
conversation_history = []

def add_message_to_history(role: str, content: str):
    """Add a message to the conversation history."""
    if role == "human":
        conversation_history.append(HumanMessage(content=content))
    elif role == "ai":
        conversation_history.append(AIMessage(content=content))
    elif role == "system":
        conversation_history.append(SystemMessage(content=content))
    return conversation_history

# Initialize with a system message
add_message_to_history("system", "You are a helpful, friendly AI assistant.")

def stream_response(message: str, history: List[List[str]]):
    """Process user message and stream the response."""
    # Add user message to history
    add_message_to_history("human", message)

    # Create a generator to stream responses
    response = ""
    for chunk in llm.stream([m for m in conversation_history]):
        # Extract content from AIMessageChunk
        if hasattr(chunk, 'content'):
            chunk_content = chunk.content
        else:
            chunk_content = str(chunk)

        response += chunk_content
        yield response

    # Add AI response to history when complete
    add_message_to_history("ai", response)

# Create Gradio interface with streaming
with gr.Blocks() as demo:
    gr.Markdown("# Ollama Chatbot with Streaming")

    chatbot = gr.Chatbot(height=500)
    msg = gr.Textbox(placeholder="Type your message here...", container=False)
    clear = gr.Button("Clear Chat")

    def user(message, history):
        # Return immediately for the user message
        return "", history + [[message, None]]

    def bot(history):
        # Process the last user message
        user_message = history[-1][0]
        history[-1][1] = ""  # Initialize bot's response

        for response in stream_response(user_message, history):
            history[-1][1] = response
            yield history

    msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
        bot, chatbot, chatbot
    )

    clear.click(lambda: None, None, chatbot, queue=False)

if __name__ == "__main__":
    # Launch the Gradio interface
    demo.queue()
    demo.launch(share=False)  # Set share=True to create a public link