Skip to content

regression

ipw.analysis.regression

Regression utilities and default analysis implementation.

RegressionAnalysis

Bases: AnalysisProvider

Default analysis computing regression statistics for metrics runs.

Source code in intelligence-per-watt/src/ipw/analysis/regression.py
@AnalysisRegistry.register("regression")
class RegressionAnalysis(AnalysisProvider):
    """Default analysis computing regression statistics for metrics runs."""

    analysis_id = "regression"

    def run(self, context: AnalysisContext) -> AnalysisResult:
        results_dir = context.results_dir
        options = dict(context.options)
        requested_model = options.get("model")
        skip_zeroes = bool(options.get("skip_zeroes", False))

        dataset = load_metrics_dataset(results_dir)
        active_model = resolve_model_name(dataset, requested_model, results_dir)

        entries = list(iter_model_entries(dataset, active_model))
        if not entries:
            raise RuntimeError(
                f"No usable metrics found for model '{active_model}' in dataset at '{results_dir}'."
            )

        regressions, zero_counts = create_regression_containers()

        samples_collected = 0
        for entry in entries:
            token_metrics = _get_mapping(entry.get("token_metrics"))
            latency_metrics = _get_mapping(entry.get("latency_metrics"))
            energy_metrics = _get_mapping(entry.get("energy_metrics"))
            power_metrics = _get_mapping(entry.get("power_metrics"))

            prompt_tokens = to_float(token_metrics.get("input"))
            completion_tokens = to_float(token_metrics.get("output"))
            total_tokens = to_float(token_metrics.get("total"))
            if total_tokens is None:
                total_tokens = derive_total_tokens(prompt_tokens, completion_tokens)

            ttft_value = to_float(latency_metrics.get("time_to_first_token_seconds"))
            total_latency_value = to_float(latency_metrics.get("total_query_seconds"))

            energy_value = to_float(energy_metrics.get("per_query_joules"))
            power_value = _extract_power_value(power_metrics)

            register_regression_sample(
                regressions,
                zero_counts,
                prompt_tokens=prompt_tokens,
                completion_tokens=completion_tokens,
                total_tokens=total_tokens,
                ttft_seconds=ttft_value,
                total_latency_seconds=total_latency_value,
                per_query_joules=energy_value,
                per_query_watts=power_value,
            )
            samples_collected += 1

        if samples_collected == 0:
            raise RuntimeError(
                f"No usable metrics found for model '{active_model}' in dataset at '{results_dir}'."
            )

        regression_results = finalize_regressions(regressions)
        if skip_zeroes:
            regression_results = _filter_none_regressions(regression_results)

        warnings = build_zero_warnings(zero_counts, context=" in dataset")

        summary_payload = {
            "total_samples": samples_collected,
        }
        data_payload: Dict[str, Any] = {
            "regressions": dict(regression_results),
        }

        artifact_payload = {
            "analysis": self.analysis_id,
            "summary": summary_payload,
            "warnings": list(warnings),
            "data": data_payload,
        }

        artifact_dir = results_dir / "analysis"
        artifact_dir.mkdir(parents=True, exist_ok=True)
        artifact_path = artifact_dir / f"{self.analysis_id}.json"
        artifact_path.write_text(json.dumps(artifact_payload, indent=2, default=str))

        return AnalysisResult(
            analysis=self.analysis_id,
            summary=summary_payload,
            data=data_payload,
            warnings=tuple(warnings),
            artifacts={"report": artifact_path},
        )