class TestGAIAHandler:
"""Test GAIAHandler with mocked LLM client."""
def _make_handler(self, chat_return: str = "") -> GAIAHandler:
mock_client = MagicMock()
mock_client.chat.return_value = chat_return
return GAIAHandler(client=mock_client)
def test_exact_match_returns_true(self) -> None:
handler = self._make_handler()
is_correct, meta = handler.evaluate(
problem="Q",
reference="42",
model_answer="42",
metadata={},
)
assert is_correct is True
assert meta["match_type"] == "exact"
def test_empty_response_returns_false(self) -> None:
handler = self._make_handler()
is_correct, meta = handler.evaluate(
problem="Q",
reference="42",
model_answer="",
metadata={},
)
assert is_correct is False
assert meta["reason"] == "empty_response"
def test_no_ground_truth_returns_none(self) -> None:
handler = self._make_handler()
is_correct, meta = handler.evaluate(
problem="Q",
reference="",
model_answer="42",
metadata={},
)
assert is_correct is None
assert meta["reason"] == "no_ground_truth"
def test_llm_fallback_correct(self) -> None:
handler = self._make_handler(
chat_return="extracted_final_answer: Paris\nreasoning: matches\ncorrect: yes"
)
is_correct, meta = handler.evaluate(
problem="Capital of France?",
reference="Paris",
model_answer="The capital is Paris, France",
metadata={},
)
assert is_correct is True
assert meta["match_type"] == "llm_fallback"
def test_llm_fallback_incorrect(self) -> None:
handler = self._make_handler(
chat_return="extracted_final_answer: London\nreasoning: wrong\ncorrect: no"
)
is_correct, meta = handler.evaluate(
problem="Capital of France?",
reference="Paris",
model_answer="London",
metadata={},
)
assert is_correct is False
def test_llm_fallback_error(self) -> None:
mock_client = MagicMock()
mock_client.chat.side_effect = RuntimeError("API error")
handler = GAIAHandler(client=mock_client)
is_correct, meta = handler.evaluate(
problem="Q",
reference="A",
model_answer="wrong answer",
metadata={},
)
assert is_correct is False
assert "error" in meta