Skip to content

Versioning API

swebench.versioning

constants

MAP_REPO_TO_VERSION_PATHS module-attribute
MAP_REPO_TO_VERSION_PATHS = {'dbt-labs/dbt-core': ['core/dbt/version.py', 'core/dbt/__init__.py'], 'django/django': ['django/__init__.py'], 'huggingface/transformers': ['src/transformers/__init__.py'], 'marshmallow-code/marshmallow': ['src/marshmallow/__init__.py'], 'mwaskom/seaborn': ['seaborn/__init__.py'], 'pallets/flask': ['src/flask/__init__.py', 'flask/__init__.py'], 'psf/requests': ['requests/__version__.py', 'requests/__init__.py'], 'pyca/cryptography': ['src/cryptography/__about__.py', 'src/cryptography/__init__.py'], 'pylint-dev/astroid': ['astroid/__pkginfo__.py', 'astroid/__init__.py'], 'pylint-dev/pylint': ['pylint/__pkginfo__.py', 'pylint/__init__.py'], 'pytest-dev/pytest': ['src/_pytest/_version.py', '_pytest/_version.py'], 'pyvista/pyvista': ['pyvista/_version.py', 'pyvista/__init__.py'], 'Qiskit/qiskit': ['qiskit/VERSION.txt'], 'scikit-learn/scikit-learn': ['sklearn/__init__.py'], 'sphinx-doc/sphinx': ['sphinx/__init__.py'], 'sympy/sympy': ['sympy/release.py', 'sympy/__init__.py']}
MAP_REPO_TO_VERSION_PATTERNS module-attribute
MAP_REPO_TO_VERSION_PATTERNS = {k: ['__version__ = [\\\'"](.*)[\\\'"]', 'VERSION = \\((.*)\\)']for k in ['dbt-labs/dbt-core', 'django/django', 'huggingface/transformers', 'marshmallow-code/marshmallow', 'mwaskom/seaborn', 'pallets/flask', 'psf/requests', 'pyca/cryptography', 'pylint-dev/astroid', 'pylint-dev/pylint', 'scikit-learn/scikit-learn', 'sphinx-doc/sphinx', 'sympy/sympy']}
SWE_BENCH_URL_RAW module-attribute
SWE_BENCH_URL_RAW = 'https://raw.githubusercontent.com/'

get_versions

logger module-attribute
logger = getLogger(__name__)
INSTALL_CMD module-attribute
INSTALL_CMD = {'pytest-dev/pytest': 'pip install -e .', 'matplotlib/matplotlib': 'python -m pip install -e .', 'pydata/xarray': 'pip install -e .'}
parser module-attribute
parser = ArgumentParser()
args module-attribute
args = parse_args()
get_version
get_version(instance, is_build=False, path_repo=None)

Function for looking up the version of a task instance.

If is_build is True, then the version is looked up by 1. building the repo at the instance's base commit, 2. activating the conda environment, and 3. looking for the version according to a predefined list of paths.

Otherwise, the version is looked up by searching GitHub at the instance's base commit for the version according to a predefined list of paths.

Parameters:

Name Type Description Default
instance dict

Instance to find version for

required
is_build bool

Whether to build the repo and look for the version

False
path_repo str

Path to repo to build

None

Returns: str: Version text, if found

Source code in swebench/versioning/get_versions.py
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
def get_version(instance, is_build=False, path_repo=None):
    """
    Function for looking up the version of a task instance.

    If is_build is True, then the version is looked up by 1. building the repo
    at the instance's base commit, 2. activating the conda environment, and 3.
    looking for the version according to a predefined list of paths.

    Otherwise, the version is looked up by searching GitHub at the instance's
    base commit for the version according to a predefined list of paths.

    Args:
        instance (dict): Instance to find version for
        is_build (bool): Whether to build the repo and look for the version
        path_repo (str): Path to repo to build
    Returns:
        str: Version text, if found
    """
    keep_major_minor = lambda x, sep: ".".join(x.strip().split(sep)[:2])
    paths_to_version = MAP_REPO_TO_VERSION_PATHS[instance["repo"]]
    version = None
    for path_to_version in paths_to_version:
        init_text = None
        if is_build and path_repo is not None:
            version_path_abs = os.path.join(path_repo, path_to_version)
            if os.path.exists(version_path_abs):
                logger.info(f"Found version file at {path_to_version}")
                with open(path_to_version) as f:
                    init_text = f.read()
        else:
            url = os.path.join(
                SWE_BENCH_URL_RAW,
                instance["repo"],
                instance["base_commit"],
                path_to_version,
            )
            init_text = requests.get(url).text
        version = _find_version_in_text(init_text, instance)
        if version is not None:
            if "." in version:
                version = keep_major_minor(version, ".")
            if "," in version:
                version = keep_major_minor(version, ",")
            version = re.sub(r"[^0-9\.]", "", version)
            return version
    return version
map_version_to_task_instances
map_version_to_task_instances(task_instances: list) -> dict

Create a map of version key to list of task instances

Parameters:

Name Type Description Default
task_instances list

List of task instances

required

Returns: dict: Map of version key to list of task instances

Source code in swebench/versioning/get_versions.py
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
def map_version_to_task_instances(task_instances: list) -> dict:
    """
    Create a map of version key to list of task instances

    Args:
        task_instances (list): List of task instances
    Returns:
        dict: Map of version key to list of task instances
    """
    return_map = {}
    if "version" in task_instances[0]:
        for instance in task_instances:
            version = instance["version"]
            if version not in return_map:
                return_map[version] = []
            return_map[version].append(instance)
        return return_map
    for instance in task_instances:
        version = get_version(instance)
        if version not in return_map:
            return_map[version] = []
        return_map[version].append(instance)
    return return_map
get_versions_from_build
get_versions_from_build(data: dict)

Logic for looking up versions by building the repo at the instance's base commit and looking for the version according to repo-specific paths.

Parameters:

Name Type Description Default
data dict

Dictionary of data for building a repo for any task instance in a given list.

required
Source code in swebench/versioning/get_versions.py
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
def get_versions_from_build(data: dict):
    """
    Logic for looking up versions by building the repo at the instance's base
    commit and looking for the version according to repo-specific paths.

    Args:
        data (dict): Dictionary of data for building a repo for any task instance
            in a given list.
    """
    data_tasks, path_repo, conda_env, path_conda, save_path = (
        data["data_tasks"],
        data["path_repo"],
        data["conda_env"],
        data["path_conda"],
        data["save_path"],
    )
    # Activate conda environment and set installation command
    cmd_activate = f"source {os.path.join(path_conda, 'bin/activate')}"
    cmd_source = f"source {os.path.join(path_conda, 'etc/profile.d/conda.sh')}"
    cmd_install = INSTALL_CMD[data_tasks[0]["repo"]]

    # Change directory to repo testbed
    cwd = os.getcwd()
    os.chdir(path_repo)

    for instance in data_tasks[::-1]:
        # Reset repo to base commit
        subprocess.run(
            "git restore .", check=True, shell=True, stdout=subprocess.DEVNULL
        )
        subprocess.run(
            "git reset HEAD .", check=True, shell=True, stdout=subprocess.DEVNULL
        )
        subprocess.run(
            "git clean -fd", shell=True, check=True, stdout=subprocess.DEVNULL
        )
        out_check = subprocess.run(
            f"git -c advice.detachedHead=false checkout {instance['base_commit']}",
            shell=True,
            stdout=subprocess.DEVNULL,
        )
        if out_check.returncode != 0:
            logger.error(f"[{instance['instance_id']}] Checkout failed")
            continue

        # Run installation command in repo
        out_install = subprocess.run(
            f"{cmd_source}; {cmd_activate} {conda_env}; {cmd_install}",
            shell=True,
            stdout=subprocess.DEVNULL,
        )
        if out_install.returncode != 0:
            logger.error(f"[{instance['instance_id']}] Installation failed")
            continue

        # Look up version according to repo-specific paths
        version = get_version(instance, is_build=True, path_repo=path_repo)
        instance["version"] = version
        logger.info(f"For instance {instance['instance_id']}, version is {version}")

    # Save results
    with open(save_path, "w") as f:
        json.dump(data_tasks, fp=f)
    os.chdir(cwd)
get_versions_from_web
get_versions_from_web(data: dict)

Logic for looking up versions by searching GitHub at the instance's base commit and looking for the version according to repo-specific paths.

Parameters:

Name Type Description Default
data dict

Dictionary of data for searching GitHub for any task instance in a given list.

required
Source code in swebench/versioning/get_versions.py
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
def get_versions_from_web(data: dict):
    """
    Logic for looking up versions by searching GitHub at the instance's base
    commit and looking for the version according to repo-specific paths.

    Args:
        data (dict): Dictionary of data for searching GitHub for any task instance
            in a given list.
    """
    data_tasks, save_path = data["data_tasks"], data["save_path"]
    version_not_found = data["not_found_list"]
    for instance in data_tasks:
        version = get_version(instance)
        if version is not None:
            instance["version"] = version
            logger.info(f"For instance {instance['instance_id']}, version is {version}")
        elif version_not_found is not None:
            logger.info(f"[{instance['instance_id']}]: version not found")
            version_not_found.append(instance)
    with open(save_path, "w") as f:
        json.dump(data_tasks, fp=f)
merge_results
merge_results(instances_path: str, repo_prefix: str, output_dir: str = None) -> int

Helper function for merging JSON result files generated from multiple threads.

Parameters:

Name Type Description Default
instances_path str

Path to original task instances without versions

required
repo_prefix str

Prefix of result files (repo name)

required
output_dir str

Path to save merged results to

None

Returns: int: Number of instances in merged results

Source code in swebench/versioning/get_versions.py
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
def merge_results(instances_path: str, repo_prefix: str, output_dir: str = None) -> int:
    """
    Helper function for merging JSON result files generated from multiple threads.

    Args:
        instances_path (str): Path to original task instances without versions
        repo_prefix (str): Prefix of result files (repo name)
        output_dir (str): Path to save merged results to
    Returns:
        int: Number of instances in merged results
    """
    # Merge values from result JSON files into a single list
    merged = []
    for task_with_version_path in glob.glob(f"{repo_prefix}_versions_*.json"):
        with open(task_with_version_path) as f:
            task_with_version = json.load(f)
            merged.extend(task_with_version)
        os.remove(task_with_version_path)

    # Save merged results to original task instances file's path with `_versions` suffix
    old_path_file = instances_path.split("/")[-1]
    instances_path_new = f"{old_path_file.split('.')[0]}_versions.json"
    if output_dir is not None:
        instances_path_new = os.path.join(output_dir, instances_path_new)
    with open(f"{instances_path_new}", "w") as f:
        json.dump(merged, fp=f)
    logger.info(
        f"Saved merged results to {instances_path_new} ({len(merged)} instances)"
    )
    return len(merged)
main
main(args)

Main function for looking up versions for task instances.

Source code in swebench/versioning/get_versions.py
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
def main(args):
    """
    Main function for looking up versions for task instances.
    """
    # Get task instances + split into groups for each thread
    data_tasks = get_instances(args.instances_path)
    data_task_lists = split_instances(data_tasks, args.num_workers)
    repo_prefix = data_tasks[0]["repo"].replace("/", "__")

    logger.info(
        f"Getting versions for {len(data_tasks)} instances for {data_tasks[0]['repo']}"
    )
    logger.info(
        f"Split instances into {len(data_task_lists)} groups with lengths {[len(x) for x in data_task_lists]}"
    )

    # If retrieval method includes GitHub, then search GitHub for versions via parallel call
    if any([x == args.retrieval_method for x in ["github", "mix"]]):
        manager = Manager()
        shared_result_list = manager.list()
        pool = Pool(processes=args.num_workers)
        pool.map(
            get_versions_from_web,
            [
                {
                    "data_tasks": data_task_list,
                    "save_path": f"{repo_prefix}_versions_{i}.json"
                    if args.retrieval_method == "github"
                    else f"{repo_prefix}_versions_{i}_web.json",
                    "not_found_list": shared_result_list
                    if args.retrieval_method == "mix"
                    else None,
                }
                for i, data_task_list in enumerate(data_task_lists)
            ],
        )
        pool.close()
        pool.join()

        if args.retrieval_method == "github":
            # If retrieval method is just GitHub, then merge results and return
            assert len(data_tasks) == merge_results(
                args.instances_path, repo_prefix, args.output_dir
            )
            return
        elif args.retrieval_method == "mix":
            # Otherwise, remove instances that were found via GitHub from the list
            shared_result_list = list(shared_result_list)
            total_web = len(data_tasks) - len(shared_result_list)
            logger.info(f"Retrieved {total_web} versions from web")
            data_task_lists = split_instances(shared_result_list, args.num_workers)
            logger.info(
                f"Split instances into {len(data_task_lists)} groups with lengths {[len(x) for x in data_task_lists]} for build"
            )

    # Check that all required arguments for installing task instances are present
    assert any([x == args.retrieval_method for x in ["build", "mix"]])
    assert all([x in args for x in ["testbed", "path_conda", "conda_env"]])
    conda_exec = os.path.join(args.path_conda, "bin/conda")

    cwd = os.getcwd()
    os.chdir(args.testbed)
    for x in range(0, args.num_workers):
        # Clone git repo per thread
        testbed_repo_name = f"{repo_prefix}__{x}"
        if not os.path.exists(testbed_repo_name):
            logger.info(
                f"Creating clone of {data_tasks[0]['repo']} at {testbed_repo_name}"
            )
            cmd_clone = (
                f"git clone git@github.com:swe-bench/{repo_prefix} {testbed_repo_name}"
            )
            subprocess.run(cmd_clone, shell=True, check=True, stdout=subprocess.DEVNULL)
        else:
            logger.info(
                f"Repo for {data_tasks[0]['repo']} exists: {testbed_repo_name}; skipping..."
            )
        # Clone conda environment per thread
        conda_env_name = f"{args.conda_env}_clone_{x}"
        if not os.path.exists(os.path.join(args.path_conda, "envs", conda_env_name)):
            logger.info(f"Creating clone of {args.conda_env} at {conda_env_name}")
            cmd_clone_env = f"{conda_exec} create --name {conda_env_name} --clone {args.conda_env} -y"
            subprocess.run(
                cmd_clone_env, shell=True, check=True, stdout=subprocess.DEVNULL
            )
        else:
            logger.info(
                f"Conda clone for thread {x} exists: {conda_env_name}; skipping..."
            )
    os.chdir(cwd)

    # Create pool tasks
    pool_tasks = []
    for i in range(0, args.num_workers):
        testbed_repo_name = f"{repo_prefix}__{i}"
        pool_tasks.append(
            {
                "data_tasks": data_task_lists[i],
                "path_repo": os.path.join(args.testbed, testbed_repo_name),
                "conda_env": f"{args.conda_env}_clone_{i}",
                "path_conda": args.path_conda,
                "save_path": os.path.join(cwd, f"{repo_prefix}_versions_{i}.json"),
            }
        )

    # Parallelized call
    pool = Pool(processes=args.num_workers)
    pool.map(get_versions_from_build, pool_tasks)
    pool.close()
    pool.join()

    # Check that correct number of instances were versioned
    if args.retrieval_method == "mix":
        assert (
            len(data_tasks)
            == merge_results(args.instances_path, repo_prefix, args.output_dir)
            + total_web
        )
    elif args.retrieval_method == "build":
        assert len(data_tasks) == merge_results(
            args.instances_path, repo_prefix, args.output_dir
        )

    # Remove testbed repo and conda environments
    if args.cleanup:
        cwd = os.getcwd()
        os.chdir(args.testbed)
        for x in range(0, args.num_workers):
            # Remove git repo
            testbed_repo_name = f"{repo_prefix}__{x}"
            subprocess.run(f"rm -rf {testbed_repo_name}", shell=True, check=True)

            # Remove conda environment
            cmd_rm_env = (
                f"{conda_exec} remove --name {args.conda_env}_clone_{x} --all -y"
            )
            subprocess.run(cmd_rm_env, shell=True, check=True)
        os.chdir(cwd)

utils

get_instances
get_instances(instance_path: str) -> list

Get task instances from given path

Parameters:

Name Type Description Default
instance_path str

Path to task instances

required

Returns: task_instances (list): List of task instances

Source code in swebench/versioning/utils.py
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
def get_instances(instance_path: str) -> list:
    """
    Get task instances from given path

    Args:
        instance_path (str): Path to task instances
    Returns:
        task_instances (list): List of task instances
    """
    if any([instance_path.endswith(x) for x in [".jsonl", ".jsonl.all"]]):
        task_instances = list()
        with open(instance_path) as f:
            for line in f.readlines():
                task_instances.append(json.loads(line))
        return task_instances

    with open(instance_path) as f:
        task_instances = json.load(f)
    return task_instances
split_instances
split_instances(input_list: list, n: int) -> list

Split a list into n approximately equal length sublists

Parameters:

Name Type Description Default
input_list list

List to split

required
n int

Number of sublists to split into

required

Returns: result (list): List of sublists

Source code in swebench/versioning/utils.py
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
def split_instances(input_list: list, n: int) -> list:
    """
    Split a list into n approximately equal length sublists

    Args:
        input_list (list): List to split
        n (int): Number of sublists to split into
    Returns:
        result (list): List of sublists
    """
    avg_length = len(input_list) // n
    remainder = len(input_list) % n
    result, start = [], 0

    for i in range(n):
        length = avg_length + 1 if i < remainder else avg_length
        sublist = input_list[start : start + length]
        result.append(sublist)
        start += length

    return result