Skip to content

ollama_server

ipw.agents.mcp.ollama_server

Ollama MCP server for local models.

OllamaMCPServer

Bases: BaseMCPServer

MCP server for local models via Ollama.

Supports any model available in Ollama (Llama, Qwen, DeepSeek, etc.)

Example

server = OllamaMCPServer( model_name="llama3.2:1b", base_url="http://localhost:11434" )

result = server.execute("What is 2+2?") print(result.content) # "4" print(result.cost_usd) # 0.0 (local model, no cost)

Source code in intelligence-per-watt/src/ipw/agents/mcp/ollama_server.py
class OllamaMCPServer(BaseMCPServer):
    """MCP server for local models via Ollama.

    Supports any model available in Ollama (Llama, Qwen, DeepSeek, etc.)

    Example:
        server = OllamaMCPServer(
            model_name="llama3.2:1b",
            base_url="http://localhost:11434"
        )

        result = server.execute("What is 2+2?")
        print(result.content)  # "4"
        print(result.cost_usd)  # 0.0 (local model, no cost)
    """

    DEFAULT_BASE_URL = "http://127.0.0.1:11434"

    def __init__(
        self,
        model_name: str,
        base_url: Optional[str] = None,
        telemetry_collector: Optional[Any] = None,
        event_recorder: Optional[Any] = None,
        **ollama_params: Any,
    ):
        """Initialize Ollama MCP server.

        Args:
            model_name: Ollama model name (e.g., "llama3.2:1b", "qwen2.5:0.5b")
            base_url: Ollama server URL (default: http://127.0.0.1:11434)
            telemetry_collector: Energy monitor collector
            event_recorder: EventRecorder for per-action tracking
            **ollama_params: Additional Ollama parameters (temperature, etc.)
        """
        super().__init__(
            name=f"ollama:{model_name}",
            telemetry_collector=telemetry_collector,
            event_recorder=event_recorder,
        )

        self.model_name = model_name
        self.ollama_params = ollama_params

        # Lazy import: ollama is optional
        try:
            from ollama import Client
        except ImportError:
            raise ImportError(
                "ollama package is required for OllamaMCPServer. "
                "Install with: pip install ollama"
            )

        # Initialize Ollama client
        host = _normalize_base_url(base_url or self.DEFAULT_BASE_URL)
        self._client = Client(host=host)

    def _execute_impl(self, prompt: str, **params: Any) -> MCPToolResult:
        """Execute model inference via Ollama."""
        from ollama import ResponseError

        # Merge default params with request params
        payload = {**self.ollama_params, **params}
        payload["model"] = self.model_name
        payload["prompt"] = prompt
        payload["stream"] = True

        # Call Ollama API
        start = time.perf_counter()
        try:
            stream = self._client.generate(**payload)
        except ResponseError as exc:
            raise RuntimeError(f"Ollama error for {self.model_name}: {exc}") from exc

        # Consume stream and collect response
        content_chunks: list[str] = []
        prompt_tokens = 0
        completion_tokens = 0
        ttft_ms: Optional[float] = None

        for chunk in stream:
            text = getattr(chunk, "response", None)
            if text:
                if ttft_ms is None:
                    ttft_ms = (time.perf_counter() - start) * 1000
                content_chunks.append(text)

            # Final chunk contains token counts
            if getattr(chunk, "done", False):
                prompt_tokens = int(chunk.prompt_eval_count or prompt_tokens)
                completion_tokens = int(chunk.eval_count or completion_tokens)

        content = "".join(content_chunks)

        return MCPToolResult(
            content=content,
            usage={
                "prompt_tokens": prompt_tokens,
                "completion_tokens": completion_tokens,
                "total_tokens": prompt_tokens + completion_tokens,
            },
            cost_usd=0.0,  # Local model, no API cost
            ttft_seconds=(ttft_ms / 1000.0) if ttft_ms else None,
            metadata={"model": self.model_name, "backend": "ollama"},
        )

    def health_check(self) -> bool:
        """Check if Ollama server is available and model is loaded."""
        try:
            models = self._client.list()
            available_models = [str(m.model) for m in models.models]
            return any(self.model_name in m for m in available_models)
        except Exception:
            return False

    def list_available_models(self) -> list[str]:
        """List all models available in Ollama."""
        from ollama import ResponseError
        try:
            response = self._client.list()
            return [str(model.model) for model in response.models]
        except ResponseError as exc:
            raise RuntimeError(f"Failed to list Ollama models: {exc}") from exc

__init__(model_name, base_url=None, telemetry_collector=None, event_recorder=None, **ollama_params)

Initialize Ollama MCP server.

Parameters:

Name Type Description Default
model_name str

Ollama model name (e.g., "llama3.2:1b", "qwen2.5:0.5b")

required
base_url Optional[str]

Ollama server URL (default: http://127.0.0.1:11434)

None
telemetry_collector Optional[Any]

Energy monitor collector

None
event_recorder Optional[Any]

EventRecorder for per-action tracking

None
**ollama_params Any

Additional Ollama parameters (temperature, etc.)

{}
Source code in intelligence-per-watt/src/ipw/agents/mcp/ollama_server.py
def __init__(
    self,
    model_name: str,
    base_url: Optional[str] = None,
    telemetry_collector: Optional[Any] = None,
    event_recorder: Optional[Any] = None,
    **ollama_params: Any,
):
    """Initialize Ollama MCP server.

    Args:
        model_name: Ollama model name (e.g., "llama3.2:1b", "qwen2.5:0.5b")
        base_url: Ollama server URL (default: http://127.0.0.1:11434)
        telemetry_collector: Energy monitor collector
        event_recorder: EventRecorder for per-action tracking
        **ollama_params: Additional Ollama parameters (temperature, etc.)
    """
    super().__init__(
        name=f"ollama:{model_name}",
        telemetry_collector=telemetry_collector,
        event_recorder=event_recorder,
    )

    self.model_name = model_name
    self.ollama_params = ollama_params

    # Lazy import: ollama is optional
    try:
        from ollama import Client
    except ImportError:
        raise ImportError(
            "ollama package is required for OllamaMCPServer. "
            "Install with: pip install ollama"
        )

    # Initialize Ollama client
    host = _normalize_base_url(base_url or self.DEFAULT_BASE_URL)
    self._client = Client(host=host)

health_check()

Check if Ollama server is available and model is loaded.

Source code in intelligence-per-watt/src/ipw/agents/mcp/ollama_server.py
def health_check(self) -> bool:
    """Check if Ollama server is available and model is loaded."""
    try:
        models = self._client.list()
        available_models = [str(m.model) for m in models.models]
        return any(self.model_name in m for m in available_models)
    except Exception:
        return False

list_available_models()

List all models available in Ollama.

Source code in intelligence-per-watt/src/ipw/agents/mcp/ollama_server.py
def list_available_models(self) -> list[str]:
    """List all models available in Ollama."""
    from ollama import ResponseError
    try:
        response = self._client.list()
        return [str(model.model) for model in response.models]
    except ResponseError as exc:
        raise RuntimeError(f"Failed to list Ollama models: {exc}") from exc