@AnalysisRegistry.register("regression")
class RegressionAnalysis(AnalysisProvider):
"""Default analysis computing regression statistics for metrics runs."""
analysis_id = "regression"
def run(self, context: AnalysisContext) -> AnalysisResult:
results_dir = context.results_dir
options = dict(context.options)
requested_model = options.get("model")
skip_zeroes = bool(options.get("skip_zeroes", False))
dataset = load_metrics_dataset(results_dir)
active_model = resolve_model_name(dataset, requested_model, results_dir)
entries = list(iter_model_entries(dataset, active_model))
if not entries:
raise RuntimeError(
f"No usable metrics found for model '{active_model}' in dataset at '{results_dir}'."
)
regressions, zero_counts = create_regression_containers()
samples_collected = 0
for entry in entries:
token_metrics = _get_mapping(entry.get("token_metrics"))
latency_metrics = _get_mapping(entry.get("latency_metrics"))
energy_metrics = _get_mapping(entry.get("energy_metrics"))
power_metrics = _get_mapping(entry.get("power_metrics"))
prompt_tokens = to_float(token_metrics.get("input"))
completion_tokens = to_float(token_metrics.get("output"))
total_tokens = to_float(token_metrics.get("total"))
if total_tokens is None:
total_tokens = derive_total_tokens(prompt_tokens, completion_tokens)
ttft_value = to_float(latency_metrics.get("time_to_first_token_seconds"))
total_latency_value = to_float(latency_metrics.get("total_query_seconds"))
energy_value = to_float(energy_metrics.get("per_query_joules"))
power_value = _extract_power_value(power_metrics)
register_regression_sample(
regressions,
zero_counts,
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
total_tokens=total_tokens,
ttft_seconds=ttft_value,
total_latency_seconds=total_latency_value,
per_query_joules=energy_value,
per_query_watts=power_value,
)
samples_collected += 1
if samples_collected == 0:
raise RuntimeError(
f"No usable metrics found for model '{active_model}' in dataset at '{results_dir}'."
)
regression_results = finalize_regressions(regressions)
if skip_zeroes:
regression_results = _filter_none_regressions(regression_results)
warnings = build_zero_warnings(zero_counts, context=" in dataset")
summary_payload = {
"total_samples": samples_collected,
}
data_payload: Dict[str, Any] = {
"regressions": dict(regression_results),
}
artifact_payload = {
"analysis": self.analysis_id,
"summary": summary_payload,
"warnings": list(warnings),
"data": data_payload,
}
artifact_dir = results_dir / "analysis"
artifact_dir.mkdir(parents=True, exist_ok=True)
artifact_path = artifact_dir / f"{self.analysis_id}.json"
artifact_path.write_text(json.dumps(artifact_payload, indent=2, default=str))
return AnalysisResult(
analysis=self.analysis_id,
summary=summary_payload,
data=data_payload,
warnings=tuple(warnings),
artifacts={"report": artifact_path},
)