Skip to content

swefficiency

ipw.datasets.swefficiency

SWEfficiencyDataset

Bases: DatasetProvider

SWEfficiency benchmark dataset (swefficiency/swefficiency).

Software performance optimization benchmark (SWE-bench style).

Source code in intelligence-per-watt/src/ipw/datasets/swefficiency.py
@DatasetRegistry.register("swefficiency")
class SWEfficiencyDataset(DatasetProvider):
    """SWEfficiency benchmark dataset (swefficiency/swefficiency).

    Software performance optimization benchmark (SWE-bench style).
    """

    dataset_id = "swefficiency"
    dataset_name = "SWEfficiency"
    evaluation_method = "swefficiency"

    _hf_path = "swefficiency/swefficiency"
    _default_split = "test"

    # SWEfficiency does not use LLM judge by default -- correctness is
    # determined by running test suites, so we leave eval settings at None.
    eval_client: str | None = None
    eval_base_url: str | None = None
    eval_model: str | None = None

    def __init__(
        self,
        *,
        split: Optional[str] = None,
        max_samples: Optional[int] = None,
    ) -> None:
        self._split = split or self._default_split
        self._max_samples = max_samples
        self._records: Tuple[DatasetRecord, ...] = tuple(self._build_records())

    def iter_records(self) -> Iterable[DatasetRecord]:
        return iter(self._records)

    def size(self) -> int:
        return len(self._records)

    # ------------------------------------------------------------------
    # Dataset loading
    # ------------------------------------------------------------------

    def _build_records(self) -> List[DatasetRecord]:
        rows = self._load_raw_rows()
        records: List[DatasetRecord] = []
        for raw in rows:
            record = self._convert_row(raw)
            if record is not None:
                records.append(record)
        return records

    def _load_raw_rows(self) -> Sequence[MutableMapping[str, object]]:
        dataset = load_dataset(self._hf_path, split=self._split)
        rows: Sequence[MutableMapping[str, object]]
        if hasattr(dataset, "to_list"):
            rows = dataset.to_list()
        else:
            rows = list(dataset)
        if self._max_samples is not None:
            rows = rows[: self._max_samples]
        normalized: list[MutableMapping[str, object]] = []
        for row in rows:
            if isinstance(row, MutableMapping):
                normalized.append(row)
            else:
                normalized.append(dict(row))
        return normalized

    def _convert_row(self, raw: MutableMapping[str, object]) -> Optional[DatasetRecord]:
        instance_id = str(raw.get("instance_id") or "")
        repo = str(raw.get("repo") or "")
        problem_statement = str(raw.get("problem_statement") or "").strip()
        workload = str(raw.get("workload") or "")
        speedup = float(raw.get("speedup", raw.get("expected_speedup", 1.0)) or 1.0)

        if not instance_id or not problem_statement:
            return None

        problem = _DEFAULT_INPUT_PROMPT.format(
            repo=repo,
            problem_statement=problem_statement,
            workload=workload,
            expected_speedup=speedup,
        )

        # The "answer" is the ground-truth patch
        patch = str(raw.get("patch") or "")

        covering_tests = _parse_test_list(
            raw.get("covering_tests", raw.get("COVERING_TESTS", []))
        )
        pass_to_pass = _parse_test_list(
            raw.get("pass_to_pass", raw.get("PASS_TO_PASS", []))
        )

        metadata: MutableMapping[str, object] = {
            "dataset_name": self.dataset_name,
            "instance_id": instance_id,
            "repo": repo,
            "base_commit": raw.get("base_commit"),
            "test_patch": raw.get("test_patch"),
            "test_cmd": raw.get("test_cmd"),
            "rebuild_cmd": raw.get("rebuild_cmd"),
            "image_name": raw.get("image_name"),
            "speedup": speedup,
            "covering_tests": covering_tests,
            "pass_to_pass": pass_to_pass,
        }

        return DatasetRecord(
            problem=problem,
            answer=patch,
            subject=repo,
            dataset_metadata=metadata,
        )