Skip to content

helpers

ipw.analysis.helpers

Dataset loading helpers for regression analysis.

load_metrics_dataset(results_dir)

Load the profiler metrics dataset stored on disk.

Source code in intelligence-per-watt/src/ipw/analysis/helpers.py
def load_metrics_dataset(results_dir: Path) -> Dataset:
    """Load the profiler metrics dataset stored on disk."""

    data = load_from_disk(str(results_dir))
    if isinstance(data, DatasetDict):
        if not data:
            raise RuntimeError(f"No splits found in dataset at {results_dir!s}")
        dataset = next(iter(data.values()))
    else:
        dataset = data

    if not isinstance(dataset, Dataset):
        raise RuntimeError(
            f"Unsupported dataset object returned for {results_dir!s}: {type(dataset)!r}"
        )
    if len(dataset) == 0:
        raise RuntimeError(f"Empty dataset at {results_dir!s}")
    return dataset

resolve_model_name(dataset, requested, results_dir)

Determine which model within the dataset should be analyzed.

Source code in intelligence-per-watt/src/ipw/analysis/helpers.py
def resolve_model_name(
    dataset: Dataset, requested: str | None, results_dir: Path
) -> str:
    """Determine which model within the dataset should be analyzed."""

    available = _collect_available_models(dataset)

    if requested:
        if requested in available:
            return requested
        if available:
            models_text = ", ".join(sorted(available))
            raise RuntimeError(
                f"Model '{requested}' not found in dataset at '{results_dir}'. Available models: {models_text}."
            )
        raise RuntimeError(
            f"Model '{requested}' not found in dataset at '{results_dir}', and the dataset does not contain model metrics."
        )

    unique_models = list(dict.fromkeys(available))
    if len(unique_models) == 1:
        return unique_models[0]
    if not unique_models:
        raise RuntimeError(
            f"Could not infer model name from dataset at '{results_dir}'. Specify a model explicitly."
        )
    models_text = ", ".join(sorted(unique_models))
    raise RuntimeError(
        f"Dataset at '{results_dir}' contains metrics for multiple models ({models_text}). Specify a model to choose one."
    )

iter_model_entries(dataset, model_name)

Yield the per-record metrics for the requested model.

Source code in intelligence-per-watt/src/ipw/analysis/helpers.py
def iter_model_entries(
    dataset: Dataset, model_name: str
) -> Iterator[Mapping[str, object]]:
    """Yield the per-record metrics for the requested model."""

    for example in dataset:
        metrics_map = (
            example.get("model_metrics") if isinstance(example, Mapping) else None
        )
        if not isinstance(metrics_map, Mapping):
            continue
        entry = metrics_map.get(model_name)
        if isinstance(entry, Mapping):
            yield entry

collect_run_metadata(entries)

Collect the first non-empty system and GPU metadata structures.

Source code in intelligence-per-watt/src/ipw/analysis/helpers.py
def collect_run_metadata(
    entries: Iterable[Mapping[str, object]],
) -> tuple[dict[str, object], dict[str, object]]:
    """Collect the first non-empty system and GPU metadata structures."""

    system_info: dict[str, object] = {}
    gpu_info: dict[str, object] = {}

    for entry in entries:
        if not system_info:
            maybe_system = entry.get("system_info")
            if isinstance(maybe_system, Mapping):
                system_info = dict(cast(Mapping, maybe_system))
        if not gpu_info:
            maybe_gpu = entry.get("gpu_info")
            if isinstance(maybe_gpu, Mapping):
                gpu_info = dict(cast(Mapping, maybe_gpu))
        if system_info and gpu_info:
            break

    return system_info, gpu_info

compute_accuracy_summary(dataset, model_name)

Compute accuracy statistics for a given model within an evaluated dataset.

This helper assumes that each record's model_metrics entry for the target model may contain an evaluation mapping with an is_correct boolean.

Source code in intelligence-per-watt/src/ipw/analysis/helpers.py
def compute_accuracy_summary(
    dataset: Dataset,
    model_name: str,
) -> Mapping[str, object]:
    """
    Compute accuracy statistics for a given model within an evaluated dataset.

    This helper assumes that each record's model_metrics entry for the target
    model may contain an ``evaluation`` mapping with an ``is_correct`` boolean.
    """

    counters = {
        "correct": 0,
        "incorrect": 0,
        "unevaluated": 0,
    }

    for example in dataset:
        metrics_map = (
            example.get("model_metrics") if isinstance(example, Mapping) else None
        )
        if not isinstance(metrics_map, Mapping):
            counters["unevaluated"] += 1
            continue
        entry = metrics_map.get(model_name)
        if not isinstance(entry, Mapping):
            counters["unevaluated"] += 1
            continue

        evaluation = entry.get("evaluation")
        if not isinstance(evaluation, Mapping):
            counters["unevaluated"] += 1
            continue

        is_correct = evaluation.get("is_correct")
        if is_correct is True:
            counters["correct"] += 1
        elif is_correct is False:
            counters["incorrect"] += 1
        else:
            counters["unevaluated"] += 1

    total_scored = counters["correct"] + counters["incorrect"]
    accuracy = (
        counters["correct"] / total_scored if total_scored > 0 else None
    )

    return {
        "model": model_name,
        "correct": counters["correct"],
        "incorrect": counters["incorrect"],
        "unevaluated": counters["unevaluated"],
        "total_scored": total_scored,
        "accuracy": accuracy,
    }