Harness API
swebench.harness
__all__
module-attribute
__all__ = ['docker_build', 'docker_utils', 'grading', 'prepare_images', 'remove_containers', 'reporting', 'run_evaluation', 'utils', 'constants', 'dockerfiles', 'log_parsers', 'modal_eval', 'test_spec']
constants
TEST_XVFB_PREFIX
module-attribute
TEST_XVFB_PREFIX = 'xvfb-run --server-args="-screen 0 1280x1024x24 -ac :99"'
XVFB_DEPS
module-attribute
XVFB_DEPS = ['python3', 'python3-pip', 'xvfb', 'x11-xkb-utils', 'xfonts-100dpi', 'xfonts-75dpi', 'xfonts-scalable', 'xfonts-cyrillic', 'x11-apps', 'firefox']
X11_DEPS
module-attribute
X11_DEPS = ['libx11-xcb1', 'libxcomposite1', 'libxcursor1', 'libxdamage1', 'libxi6', 'libxtst6', 'libnss3', 'libcups2', 'libxss1', 'libxrandr2', 'libasound2', 'libatk1.0-0', 'libgtk-3-0', 'x11-utils']
SPECS_CALYPSO
module-attribute
SPECS_CALYPSO = {None: {k: {'apt-pkgs': ['libsass-dev', 'sassc'], 'install': ['npm install --unsafe-perm'], 'test_cmd': 'npm run test-client', 'docker_specs': {'node_version': k}}for k in ['0.8', '4.2.3', '4.3.0', '5.10.1', '5.11.1', '6.1.0', '6.7.0', '6.9.0', '6.9.1', '6.9.4', '6.10.0', '6.10.2', '6.10.3', '6.11.1', '6.11.2', '6.11.5', '8.9.1', '8.9.3', '8.9.4', '8.11.0', '8.11.2', '10.4.1', '10.5.0', '10.6.0', '10.9.0', '10.10.0', '10.12.0', '10.13.0', '10.14.0', '10.15.2', '10.16.3']}}
TEST_CHART_JS_TEMPLATE
module-attribute
TEST_CHART_JS_TEMPLATE = './node_modules/.bin/cross-env NODE_ENV=test ./node_modules/.bin/karma start {} --single-run --coverage --grep --auto-watch false'
SPECS_CHART_JS
module-attribute
SPECS_CHART_JS = {None: {k: {'install': ['pnpm install', 'pnpm run build'], 'test_cmd': ['pnpm install', 'pnpm run build', f'{TEST_XVFB_PREFIX} su chromeuser -c "{format('./karma.conf.cjs')}"'], 'docker_specs': {'node_version': '21.6.2', 'pnpm_version': '7.9.0', 'run_args': {'cap_add': ['SYS_ADMIN']}}}for k in ['4.0', '4.1', '4.2', '4.3', '4.4']}, None: {k: {'install': ['npm install'], 'test_cmd': ['npm install', 'npm run build', f'{TEST_XVFB_PREFIX} su chromeuser -c "{format('./karma.conf.js')}"'], 'docker_specs': {'node_version': '21.6.2', 'run_args': {'cap_add': ['SYS_ADMIN']}}}for k in ['3.0', '3.1', '3.2', '3.3', '3.4', '3.5', '3.6', '3.7', '3.8']}, None: {k: {'install': ['npm install', 'npm install -g gulp-cli'], 'test_cmd': ['npm install', 'gulp build', TEST_XVFB_PREFIX + ' su chromeuser -c "gulp test"'], 'docker_specs': {'node_version': '21.6.2', 'run_args': {'cap_add': ['SYS_ADMIN']}}}for k in ['2.0', '2.1', '2.2', '2.3', '2.4', '2.5', '2.6', '2.7', '2.8', '2.9']}}
SPECS_MARKED
module-attribute
SPECS_MARKED = {None: {k: {'install': ['npm install'], 'test_cmd': './node_modules/.bin/jasmine --no-color --config=jasmine.json', 'docker_specs': {'node_version': '12.22.12'}}for k in ['0.3', '0.5', '0.6', '0.7', '1.0', '1.1', '1.2', '2.0', '3.9', '4.0', '4.1', '5.0']}}
SPECS_P5_JS
module-attribute
SPECS_P5_JS = {None: {k: {'apt-pkgs': X11_DEPS, 'install': ['npm install', "PUPPETEER_SKIP_CHROMIUM_DOWNLOAD='' node node_modules/puppeteer/install.js", './node_modules/.bin/grunt yui'], 'test_cmd': "sed -i 's/concurrency:[[:space:]]*[0-9][0-9]*/concurrency: 1/g' Gruntfile.js\nstdbuf -o 1M ./node_modules/.bin/grunt test --quiet --force", 'docker_specs': {'node_version': '14.17.3'}}for k in ['0.10', '0.2', '0.4', '0.5', '0.6', '0.7', '0.8', '0.9', '1.0', '1.1', '1.2', '1.3', '1.4', '1.5', '1.6', '1.7', '1.8', '1.9']}}
SPECS_REACT_PDF
module-attribute
SPECS_REACT_PDF = {None: {k: {'apt-pkgs': ['pkg-config', 'build-essential', 'libpixman-1-0', 'libpixman-1-dev', 'libcairo2-dev', 'libpango1.0-dev', 'libjpeg-dev', 'libgif-dev', 'librsvg2-dev'] + X11_DEPS, 'install': ['npm i -g yarn', 'yarn install'], 'test_cmd': 'NODE_OPTIONS="--experimental-vm-modules" ./node_modules/.bin/jest --no-color', 'docker_specs': {'node_version': '18.20.4'}}for k in ['1.0', '1.1', '1.2', '2.0']}}
MAP_REPO_VERSION_TO_SPECS_JS
module-attribute
MAP_REPO_VERSION_TO_SPECS_JS = {'Automattic/wp-calypso': SPECS_CALYPSO, 'chartjs/Chart.js': SPECS_CHART_JS, 'markedjs/marked': SPECS_MARKED, 'processing/p5.js': SPECS_P5_JS, 'diegomura/react-pdf': SPECS_REACT_PDF}
MAP_REPO_TO_INSTALL_JS
module-attribute
MAP_REPO_TO_INSTALL_JS = {}
TEST_PYTEST
module-attribute
TEST_PYTEST = 'pytest -rA'
TEST_PYTEST_VERBOSE
module-attribute
TEST_PYTEST_VERBOSE = 'pytest -rA --tb=long'
TEST_ASTROPY_PYTEST
module-attribute
TEST_ASTROPY_PYTEST = 'pytest -rA -vv -o console_output_style=classic --tb=no'
TEST_DJANGO
module-attribute
TEST_DJANGO = './tests/runtests.py --verbosity 2 --settings=test_sqlite --parallel 1'
TEST_DJANGO_NO_PARALLEL
module-attribute
TEST_DJANGO_NO_PARALLEL = './tests/runtests.py --verbosity 2'
TEST_SEABORN
module-attribute
TEST_SEABORN = 'pytest --no-header -rA'
TEST_SEABORN_VERBOSE
module-attribute
TEST_SEABORN_VERBOSE = 'pytest -rA --tb=long'
TEST_SPHINX
module-attribute
TEST_SPHINX = 'tox --current-env -epy39 -v --'
TEST_SYMPY
module-attribute
TEST_SYMPY = "PYTHONWARNINGS='ignore::UserWarning,ignore::SyntaxWarning' bin/test -C --verbose"
TEST_SYMPY_VERBOSE
module-attribute
TEST_SYMPY_VERBOSE = 'bin/test -C --verbose'
SPECS_SKLEARN
module-attribute
SPECS_SKLEARN = {k: {'python': '3.6', 'packages': 'numpy scipy cython pytest pandas matplotlib', 'install': 'python -m pip install -v --no-use-pep517 --no-build-isolation -e .', 'pip_packages': ['cython', 'numpy==1.19.2', 'setuptools', 'scipy==1.5.2'], 'test_cmd': TEST_PYTEST}for k in ['0.20', '0.21', '0.22']}
SPECS_FLASK
module-attribute
SPECS_FLASK = {'2.0': {'python': '3.9', 'packages': 'requirements.txt', 'install': 'python -m pip install -e .', 'pip_packages': ['setuptools==70.0.0', 'Werkzeug==2.3.7', 'Jinja2==3.0.1', 'itsdangerous==2.1.2', 'click==8.0.1', 'MarkupSafe==2.1.3'], 'test_cmd': TEST_PYTEST}, '2.1': {'python': '3.10', 'packages': 'requirements.txt', 'install': 'python -m pip install -e .', 'pip_packages': ['setuptools==70.0.0', 'click==8.1.3', 'itsdangerous==2.1.2', 'Jinja2==3.1.2', 'MarkupSafe==2.1.1', 'Werkzeug==2.3.7'], 'test_cmd': TEST_PYTEST}}
SPECS_DJANGO
module-attribute
SPECS_DJANGO = {k: {'python': '3.5', 'packages': 'requirements.txt', 'pre_install': ['apt-get update && apt-get install -y locales', "echo 'en_US UTF-8' > /etc/locale.gen", 'locale-gen en_US.UTF-8'], 'install': 'python setup.py install', 'pip_packages': ['setuptools'], 'eval_commands': ['export LANG=en_US.UTF-8', 'export LC_ALL=en_US.UTF-8', 'export PYTHONIOENCODING=utf8', 'export LANGUAGE=en_US:en'], 'test_cmd': TEST_DJANGO}for k in ['1.7', '1.8', '1.9', '1.10', '1.11', '2.0', '2.1', '2.2']}
SPECS_REQUESTS
module-attribute
SPECS_REQUESTS = {k: {'python': '3.9', 'packages': 'pytest', 'install': 'python -m pip install .', 'test_cmd': TEST_PYTEST}for k in ['0.7', '0.8', '0.9', '0.11', '0.13', '0.14', '1.1', '1.2', '2.0', '2.2'] + ['2.3', '2.4', '2.5', '2.7', '2.8', '2.9', '2.10', '2.11', '2.12', '2.17'] + ['2.18', '2.19', '2.22', '2.26', '2.25', '2.27', '2.31', '3.0']}
SPECS_SEABORN
module-attribute
SPECS_SEABORN = {k: {'python': '3.9', 'install': 'python -m pip install -e .', 'pip_packages': ['contourpy==1.1.0', 'cycler==0.11.0', 'fonttools==4.42.1', 'importlib-resources==6.0.1', 'kiwisolver==1.4.5', 'matplotlib==3.7.2', 'numpy==1.25.2', 'packaging==23.1', 'pandas==1.3.5', 'pillow==10.0.0', 'pyparsing==3.0.9', 'pytest', 'python-dateutil==2.8.2', 'pytz==2023.3.post1', 'scipy==1.11.2', 'six==1.16.0', 'tzdata==2023.1', 'zipp==3.16.2'], 'test_cmd': TEST_SEABORN}for k in ['0.11']}
SPECS_PYTEST
module-attribute
SPECS_PYTEST = {k: {'python': '3.9', 'install': 'python -m pip install -e .', 'test_cmd': TEST_PYTEST}for k in ['4.4', '4.5', '4.6', '5.0', '5.1', '5.2', '5.3', '5.4', '6.0', '6.2', '6.3', '7.0', '7.1', '7.2', '7.4', '8.0', '8.1', '8.2', '8.3', '8.4']}
SPECS_MATPLOTLIB
module-attribute
SPECS_MATPLOTLIB = {k: {'python': '3.11', 'packages': 'environment.yml', 'install': 'python -m pip install -e .', 'pre_install': ['apt-get -y update && apt-get -y upgrade && DEBIAN_FRONTEND=noninteractive apt-get install -y imagemagick ffmpeg texlive texlive-latex-extra texlive-fonts-recommended texlive-xetex texlive-luatex cm-super dvipng', 'QHULL_URL="http://www.qhull.org/download/qhull-2020-src-8.0.2.tgz"', 'QHULL_TAR="/tmp/qhull-2020-src-8.0.2.tgz"', 'QHULL_BUILD_DIR="/testbed/build"', 'wget -O "$QHULL_TAR" "$QHULL_URL"', 'mkdir -p "$QHULL_BUILD_DIR"', 'tar -xvzf "$QHULL_TAR" -C "$QHULL_BUILD_DIR"'], 'pip_packages': ['contourpy==1.1.0', 'cycler==0.11.0', 'fonttools==4.42.1', 'ghostscript', 'kiwisolver==1.4.5', 'numpy==1.25.2', 'packaging==23.1', 'pillow==10.0.0', 'pikepdf', 'pyparsing==3.0.9', 'python-dateutil==2.8.2', 'six==1.16.0', 'setuptools==68.1.2', 'setuptools-scm==7.1.0', 'typing-extensions==4.7.1'], 'test_cmd': TEST_PYTEST}for k in ['3.5', '3.6', '3.7', '3.8', '3.9']}
SPECS_SPHINX
module-attribute
SPECS_SPHINX = {k: {'python': '3.9', 'pip_packages': ['tox==4.16.0', 'tox-current-env==0.0.11', 'Jinja2==3.0.3'], 'install': 'python -m pip install -e .[test]', 'pre_install': ["sed -i 's/pytest/pytest -rA/' tox.ini"], 'test_cmd': TEST_SPHINX}for k in ['1.5', '1.6', '1.7', '1.8', '2.0', '2.1', '2.2', '2.3', '2.4', '3.0'] + ['3.1', '3.2', '3.3', '3.4', '3.5', '4.0', '4.1', '4.2', '4.3', '4.4'] + ['4.5', '5.0', '5.1', '5.2', '5.3', '6.0', '6.2', '7.0', '7.1', '7.2'] + ['7.3', '7.4', '8.0', '8.1']}
SPECS_ASTROPY
module-attribute
SPECS_ASTROPY = {k: {'python': '3.9', 'install': 'python -m pip install -e .[test] --verbose', 'pip_packages': ['attrs==23.1.0', 'exceptiongroup==1.1.3', 'execnet==2.0.2', 'hypothesis==6.82.6', 'iniconfig==2.0.0', 'numpy==1.25.2', 'packaging==23.1', 'pluggy==1.3.0', 'psutil==5.9.5', 'pyerfa==2.0.0.3', 'pytest-arraydiff==0.5.0', 'pytest-astropy-header==0.2.2', 'pytest-astropy==0.10.0', 'pytest-cov==4.1.0', 'pytest-doctestplus==1.0.0', 'pytest-filter-subpackage==0.1.2', 'pytest-mock==3.11.1', 'pytest-openfiles==0.5.0', 'pytest-remotedata==0.4.0', 'pytest-xdist==3.3.1', 'pytest==7.4.0', 'PyYAML==6.0.1', 'setuptools==68.0.0', 'sortedcontainers==2.4.0', 'tomli==2.0.1'], 'test_cmd': TEST_PYTEST}for k in ['3.0', '3.1', '3.2', '4.1', '4.2', '4.3', '5.0', '5.1', '5.2', 'v5.3']}
SPECS_SYMPY
module-attribute
SPECS_SYMPY = {k: {'python': '3.9', 'packages': 'mpmath flake8', 'pip_packages': ['mpmath==1.3.0', 'flake8-comprehensions'], 'install': 'python -m pip install -e .', 'test_cmd': TEST_SYMPY}for k in ['0.7', '1.0', '1.1', '1.10', '1.11', '1.12', '1.2', '1.4', '1.5', '1.6'] + ['1.7', '1.8', '1.9'] + ['1.10', '1.11', '1.12', '1.13', '1.14']}
SPECS_PYLINT
module-attribute
SPECS_PYLINT = {k: {'python': '3.9', 'packages': 'requirements.txt', 'install': 'python -m pip install -e .', 'test_cmd': TEST_PYTEST}for k in ['2.10', '2.11', '2.13', '2.14', '2.15', '2.16', '2.17', '2.8', '2.9', '3.0', '3.1', '3.2', '3.3', '4.0']}
SPECS_XARRAY
module-attribute
SPECS_XARRAY = {k: {'python': '3.10', 'packages': 'environment.yml', 'install': 'python -m pip install -e .', 'pip_packages': ['numpy==1.23.0', 'packaging==23.1', 'pandas==1.5.3', 'pytest==7.4.0', 'python-dateutil==2.8.2', 'pytz==2023.3', 'six==1.16.0', 'scipy==1.11.1', 'setuptools==68.0.0', 'dask==2022.8.1'], 'no_use_env': True, 'test_cmd': TEST_PYTEST}for k in ['0.12', '0.18', '0.19', '0.20', '2022.03', '2022.06', '2022.09', '2023.07', '2024.05']}
SPECS_SQLFLUFF
module-attribute
SPECS_SQLFLUFF = {k: {'python': '3.9', 'packages': 'requirements.txt', 'install': 'python -m pip install -e .', 'test_cmd': TEST_PYTEST}for k in ['0.10', '0.11', '0.12', '0.13', '0.4', '0.5', '0.6', '0.8', '0.9', '1.0', '1.1', '1.2', '1.3', '1.4', '2.0', '2.1', '2.2']}
SPECS_DBT_CORE
module-attribute
SPECS_DBT_CORE = {k: {'python': '3.9', 'packages': 'requirements.txt', 'install': 'python -m pip install -e .'}for k in ['0.13', '0.14', '0.15', '0.16', '0.17', '0.18', '0.19', '0.20', '0.21', '1.0', '1.1', '1.2', '1.3', '1.4', '1.5', '1.6', '1.7']}
SPECS_PYVISTA
module-attribute
SPECS_PYVISTA = {k: {'python': '3.9', 'install': 'python -m pip install -e .', 'pip_packages': ['pytest'], 'test_cmd': TEST_PYTEST}for k in ['0.20', '0.21', '0.22', '0.23']}
SPECS_ASTROID
module-attribute
SPECS_ASTROID = {k: {'python': '3.9', 'install': 'python -m pip install -e .', 'pip_packages': ['pytest'], 'test_cmd': TEST_PYTEST}for k in ['2.10', '2.12', '2.13', '2.14', '2.15', '2.16', '2.5', '2.6', '2.7', '2.8', '2.9', '3.0']}
SPECS_MARSHMALLOW
module-attribute
SPECS_MARSHMALLOW = {k: {'python': '3.9', 'install': "python -m pip install -e '.[dev]'", 'test_cmd': TEST_PYTEST}for k in ['2.18', '2.19', '2.20', '3.0', '3.1', '3.10', '3.11', '3.12', '3.13', '3.15', '3.16', '3.19', '3.2', '3.4', '3.8', '3.9']}
SPECS_PVLIB
module-attribute
SPECS_PVLIB = {k: {'python': '3.9', 'install': 'python -m pip install -e .[all]', 'packages': 'pandas scipy', 'pip_packages': ['jupyter', 'ipython', 'matplotlib', 'pytest', 'flake8'], 'test_cmd': TEST_PYTEST}for k in ['0.1', '0.2', '0.3', '0.4', '0.5', '0.6', '0.7', '0.8', '0.9']}
SPECS_PYDICOM
module-attribute
SPECS_PYDICOM = {k: {'python': '3.6', 'install': 'python -m pip install -e .', 'packages': 'numpy', 'pip_packages': ['pytest'], 'test_cmd': TEST_PYTEST}for k in ['1.0', '1.1', '1.2', '1.3', '1.4', '2.0', '2.1', '2.2', '2.3', '2.4', '3.0']}
SPECS_HUMANEVAL
module-attribute
SPECS_HUMANEVAL = {k: {'python': '3.9', 'test_cmd': 'python'}for k in ['1.0']}
MAP_REPO_VERSION_TO_SPECS_PY
module-attribute
MAP_REPO_VERSION_TO_SPECS_PY = {'astropy/astropy': SPECS_ASTROPY, 'dbt-labs/dbt-core': SPECS_DBT_CORE, 'django/django': SPECS_DJANGO, 'matplotlib/matplotlib': SPECS_MATPLOTLIB, 'marshmallow-code/marshmallow': SPECS_MARSHMALLOW, 'mwaskom/seaborn': SPECS_SEABORN, 'pallets/flask': SPECS_FLASK, 'psf/requests': SPECS_REQUESTS, 'pvlib/pvlib-python': SPECS_PVLIB, 'pydata/xarray': SPECS_XARRAY, 'pydicom/pydicom': SPECS_PYDICOM, 'pylint-dev/astroid': SPECS_ASTROID, 'pylint-dev/pylint': SPECS_PYLINT, 'pytest-dev/pytest': SPECS_PYTEST, 'pyvista/pyvista': SPECS_PYVISTA, 'scikit-learn/scikit-learn': SPECS_SKLEARN, 'sphinx-doc/sphinx': SPECS_SPHINX, 'sqlfluff/sqlfluff': SPECS_SQLFLUFF, 'swe-bench/humaneval': SPECS_HUMANEVAL, 'sympy/sympy': SPECS_SYMPY}
MAP_REPO_TO_INSTALL_PY
module-attribute
MAP_REPO_TO_INSTALL_PY = {}
MAP_REPO_TO_REQS_PATHS
module-attribute
MAP_REPO_TO_REQS_PATHS = {'dbt-labs/dbt-core': ['dev-requirements.txt', 'dev_requirements.txt'], 'django/django': ['tests/requirements/py3.txt'], 'matplotlib/matplotlib': ['requirements/dev/dev-requirements.txt', 'requirements/testing/travis_all.txt'], 'pallets/flask': ['requirements/dev.txt'], 'pylint-dev/pylint': ['requirements_test.txt'], 'pyvista/pyvista': ['requirements_test.txt', 'requirements.txt'], 'sqlfluff/sqlfluff': ['requirements_dev.txt'], 'sympy/sympy': ['requirements-dev.txt', 'requirements-test.txt']}
MAP_REPO_TO_ENV_YML_PATHS
module-attribute
MAP_REPO_TO_ENV_YML_PATHS = {'matplotlib/matplotlib': ['environment.yml'], 'pydata/xarray': ['ci/requirements/environment.yml', 'environment.yml']}
USE_X86_PY
module-attribute
USE_X86_PY = {'astropy__astropy-7973', 'django__django-10087', 'django__django-10097', 'django__django-10213', 'django__django-10301', 'django__django-10316', 'django__django-10426', 'django__django-11383', 'django__django-12185', 'django__django-12497', 'django__django-13121', 'django__django-13417', 'django__django-13431', 'django__django-13447', 'django__django-14155', 'django__django-14164', 'django__django-14169', 'django__django-14170', 'django__django-15180', 'django__django-15199', 'django__django-15280', 'django__django-15292', 'django__django-15474', 'django__django-15682', 'django__django-15689', 'django__django-15695', 'django__django-15698', 'django__django-15781', 'django__django-15925', 'django__django-15930', 'django__django-5158', 'django__django-5470', 'django__django-7188', 'django__django-7475', 'django__django-7530', 'django__django-8326', 'django__django-8961', 'django__django-9003', 'django__django-9703', 'django__django-9871', 'matplotlib__matplotlib-13983', 'matplotlib__matplotlib-13984', 'matplotlib__matplotlib-13989', 'matplotlib__matplotlib-14043', 'matplotlib__matplotlib-14471', 'matplotlib__matplotlib-22711', 'matplotlib__matplotlib-22719', 'matplotlib__matplotlib-22734', 'matplotlib__matplotlib-22767', 'matplotlib__matplotlib-22815', 'matplotlib__matplotlib-22835', 'matplotlib__matplotlib-22865', 'matplotlib__matplotlib-22871', 'matplotlib__matplotlib-22883', 'matplotlib__matplotlib-22926', 'matplotlib__matplotlib-22929', 'matplotlib__matplotlib-22931', 'matplotlib__matplotlib-22945', 'matplotlib__matplotlib-22991', 'matplotlib__matplotlib-23031', 'matplotlib__matplotlib-23047', 'matplotlib__matplotlib-23049', 'matplotlib__matplotlib-23057', 'matplotlib__matplotlib-23088', 'matplotlib__matplotlib-23111', 'matplotlib__matplotlib-23140', 'matplotlib__matplotlib-23174', 'matplotlib__matplotlib-23188', 'matplotlib__matplotlib-23198', 'matplotlib__matplotlib-23203', 'matplotlib__matplotlib-23266', 'matplotlib__matplotlib-23267', 'matplotlib__matplotlib-23288', 'matplotlib__matplotlib-23299', 'matplotlib__matplotlib-23314', 'matplotlib__matplotlib-23332', 'matplotlib__matplotlib-23348', 'matplotlib__matplotlib-23412', 'matplotlib__matplotlib-23476', 'matplotlib__matplotlib-23516', 'matplotlib__matplotlib-23562', 'matplotlib__matplotlib-23563', 'matplotlib__matplotlib-23573', 'matplotlib__matplotlib-23740', 'matplotlib__matplotlib-23742', 'matplotlib__matplotlib-23913', 'matplotlib__matplotlib-23964', 'matplotlib__matplotlib-23987', 'matplotlib__matplotlib-24013', 'matplotlib__matplotlib-24026', 'matplotlib__matplotlib-24088', 'matplotlib__matplotlib-24111', 'matplotlib__matplotlib-24149', 'matplotlib__matplotlib-24177', 'matplotlib__matplotlib-24189', 'matplotlib__matplotlib-24224', 'matplotlib__matplotlib-24250', 'matplotlib__matplotlib-24257', 'matplotlib__matplotlib-24265', 'matplotlib__matplotlib-24334', 'matplotlib__matplotlib-24362', 'matplotlib__matplotlib-24403', 'matplotlib__matplotlib-24431', 'matplotlib__matplotlib-24538', 'matplotlib__matplotlib-24570', 'matplotlib__matplotlib-24604', 'matplotlib__matplotlib-24619', 'matplotlib__matplotlib-24627', 'matplotlib__matplotlib-24637', 'matplotlib__matplotlib-24691', 'matplotlib__matplotlib-24749', 'matplotlib__matplotlib-24768', 'matplotlib__matplotlib-24849', 'matplotlib__matplotlib-24870', 'matplotlib__matplotlib-24912', 'matplotlib__matplotlib-24924', 'matplotlib__matplotlib-24970', 'matplotlib__matplotlib-24971', 'matplotlib__matplotlib-25027', 'matplotlib__matplotlib-25052', 'matplotlib__matplotlib-25079', 'matplotlib__matplotlib-25085', 'matplotlib__matplotlib-25122', 'matplotlib__matplotlib-25126', 'matplotlib__matplotlib-25129', 'matplotlib__matplotlib-25238', 'matplotlib__matplotlib-25281', 'matplotlib__matplotlib-25287', 'matplotlib__matplotlib-25311', 'matplotlib__matplotlib-25332', 'matplotlib__matplotlib-25334', 'matplotlib__matplotlib-25340', 'matplotlib__matplotlib-25346', 'matplotlib__matplotlib-25404', 'matplotlib__matplotlib-25405', 'matplotlib__matplotlib-25425', 'matplotlib__matplotlib-25430', 'matplotlib__matplotlib-25433', 'matplotlib__matplotlib-25442', 'matplotlib__matplotlib-25479', 'matplotlib__matplotlib-25498', 'matplotlib__matplotlib-25499', 'matplotlib__matplotlib-25515', 'matplotlib__matplotlib-25547', 'matplotlib__matplotlib-25551', 'matplotlib__matplotlib-25565', 'matplotlib__matplotlib-25624', 'matplotlib__matplotlib-25631', 'matplotlib__matplotlib-25640', 'matplotlib__matplotlib-25651', 'matplotlib__matplotlib-25667', 'matplotlib__matplotlib-25712', 'matplotlib__matplotlib-25746', 'matplotlib__matplotlib-25772', 'matplotlib__matplotlib-25775', 'matplotlib__matplotlib-25779', 'matplotlib__matplotlib-25785', 'matplotlib__matplotlib-25794', 'matplotlib__matplotlib-25859', 'matplotlib__matplotlib-25960', 'matplotlib__matplotlib-26011', 'matplotlib__matplotlib-26020', 'matplotlib__matplotlib-26024', 'matplotlib__matplotlib-26078', 'matplotlib__matplotlib-26089', 'matplotlib__matplotlib-26101', 'matplotlib__matplotlib-26113', 'matplotlib__matplotlib-26122', 'matplotlib__matplotlib-26160', 'matplotlib__matplotlib-26184', 'matplotlib__matplotlib-26208', 'matplotlib__matplotlib-26223', 'matplotlib__matplotlib-26232', 'matplotlib__matplotlib-26249', 'matplotlib__matplotlib-26278', 'matplotlib__matplotlib-26285', 'matplotlib__matplotlib-26291', 'matplotlib__matplotlib-26300', 'matplotlib__matplotlib-26311', 'matplotlib__matplotlib-26341', 'matplotlib__matplotlib-26342', 'matplotlib__matplotlib-26399', 'matplotlib__matplotlib-26466', 'matplotlib__matplotlib-26469', 'matplotlib__matplotlib-26472', 'matplotlib__matplotlib-26479', 'matplotlib__matplotlib-26532', 'pydata__xarray-2905', 'pydata__xarray-2922', 'pydata__xarray-3095', 'pydata__xarray-3114', 'pydata__xarray-3151', 'pydata__xarray-3156', 'pydata__xarray-3159', 'pydata__xarray-3239', 'pydata__xarray-3302', 'pydata__xarray-3305', 'pydata__xarray-3338', 'pydata__xarray-3364', 'pydata__xarray-3406', 'pydata__xarray-3520', 'pydata__xarray-3527', 'pydata__xarray-3631', 'pydata__xarray-3635', 'pydata__xarray-3637', 'pydata__xarray-3649', 'pydata__xarray-3677', 'pydata__xarray-3733', 'pydata__xarray-3812', 'pydata__xarray-3905', 'pydata__xarray-3976', 'pydata__xarray-3979', 'pydata__xarray-3993', 'pydata__xarray-4075', 'pydata__xarray-4094', 'pydata__xarray-4098', 'pydata__xarray-4182', 'pydata__xarray-4184', 'pydata__xarray-4248', 'pydata__xarray-4339', 'pydata__xarray-4356', 'pydata__xarray-4419', 'pydata__xarray-4423', 'pydata__xarray-4442', 'pydata__xarray-4493', 'pydata__xarray-4510', 'pydata__xarray-4629', 'pydata__xarray-4683', 'pydata__xarray-4684', 'pydata__xarray-4687', 'pydata__xarray-4695', 'pydata__xarray-4750', 'pydata__xarray-4758', 'pydata__xarray-4759', 'pydata__xarray-4767', 'pydata__xarray-4802', 'pydata__xarray-4819', 'pydata__xarray-4827', 'pydata__xarray-4879', 'pydata__xarray-4911', 'pydata__xarray-4939', 'pydata__xarray-4940', 'pydata__xarray-4966', 'pydata__xarray-4994', 'pydata__xarray-5033', 'pydata__xarray-5126', 'pydata__xarray-5131', 'pydata__xarray-5180', 'pydata__xarray-5187', 'pydata__xarray-5233', 'pydata__xarray-5362', 'pydata__xarray-5365', 'pydata__xarray-5455', 'pydata__xarray-5580', 'pydata__xarray-5662', 'pydata__xarray-5682', 'pydata__xarray-5731', 'pydata__xarray-6135', 'pydata__xarray-6386', 'pydata__xarray-6394', 'pydata__xarray-6400', 'pydata__xarray-6461', 'pydata__xarray-6548', 'pydata__xarray-6598', 'pydata__xarray-6599', 'pydata__xarray-6601', 'pydata__xarray-6721', 'pydata__xarray-6744', 'pydata__xarray-6798', 'pydata__xarray-6804', 'pydata__xarray-6823', 'pydata__xarray-6857', 'pydata__xarray-6882', 'pydata__xarray-6889', 'pydata__xarray-6938', 'pydata__xarray-6971', 'pydata__xarray-6992', 'pydata__xarray-6999', 'pydata__xarray-7003', 'pydata__xarray-7019', 'pydata__xarray-7052', 'pydata__xarray-7089', 'pydata__xarray-7101', 'pydata__xarray-7105', 'pydata__xarray-7112', 'pydata__xarray-7120', 'pydata__xarray-7147', 'pydata__xarray-7150', 'pydata__xarray-7179', 'pydata__xarray-7203', 'pydata__xarray-7229', 'pydata__xarray-7233', 'pydata__xarray-7347', 'pydata__xarray-7391', 'pydata__xarray-7393', 'pydata__xarray-7400', 'pydata__xarray-7444', 'pytest-dev__pytest-10482', 'scikit-learn__scikit-learn-10198', 'scikit-learn__scikit-learn-10297', 'scikit-learn__scikit-learn-10306', 'scikit-learn__scikit-learn-10331', 'scikit-learn__scikit-learn-10377', 'scikit-learn__scikit-learn-10382', 'scikit-learn__scikit-learn-10397', 'scikit-learn__scikit-learn-10427', 'scikit-learn__scikit-learn-10428', 'scikit-learn__scikit-learn-10443', 'scikit-learn__scikit-learn-10452', 'scikit-learn__scikit-learn-10459', 'scikit-learn__scikit-learn-10471', 'scikit-learn__scikit-learn-10483', 'scikit-learn__scikit-learn-10495', 'scikit-learn__scikit-learn-10508', 'scikit-learn__scikit-learn-10558', 'scikit-learn__scikit-learn-10577', 'scikit-learn__scikit-learn-10581', 'scikit-learn__scikit-learn-10687', 'scikit-learn__scikit-learn-10774', 'scikit-learn__scikit-learn-10777', 'scikit-learn__scikit-learn-10803', 'scikit-learn__scikit-learn-10844', 'scikit-learn__scikit-learn-10870', 'scikit-learn__scikit-learn-10881', 'scikit-learn__scikit-learn-10899', 'scikit-learn__scikit-learn-10908', 'scikit-learn__scikit-learn-10913', 'scikit-learn__scikit-learn-10949', 'scikit-learn__scikit-learn-10982', 'scikit-learn__scikit-learn-10986', 'scikit-learn__scikit-learn-11040', 'scikit-learn__scikit-learn-11042', 'scikit-learn__scikit-learn-11043', 'scikit-learn__scikit-learn-11151', 'scikit-learn__scikit-learn-11160', 'scikit-learn__scikit-learn-11206', 'scikit-learn__scikit-learn-11235', 'scikit-learn__scikit-learn-11243', 'scikit-learn__scikit-learn-11264', 'scikit-learn__scikit-learn-11281', 'scikit-learn__scikit-learn-11310', 'scikit-learn__scikit-learn-11315', 'scikit-learn__scikit-learn-11333', 'scikit-learn__scikit-learn-11346', 'scikit-learn__scikit-learn-11391', 'scikit-learn__scikit-learn-11496', 'scikit-learn__scikit-learn-11542', 'scikit-learn__scikit-learn-11574', 'scikit-learn__scikit-learn-11578', 'scikit-learn__scikit-learn-11585', 'scikit-learn__scikit-learn-11596', 'scikit-learn__scikit-learn-11635', 'scikit-learn__scikit-learn-12258', 'scikit-learn__scikit-learn-12421', 'scikit-learn__scikit-learn-12443', 'scikit-learn__scikit-learn-12462', 'scikit-learn__scikit-learn-12471', 'scikit-learn__scikit-learn-12486', 'scikit-learn__scikit-learn-12557', 'scikit-learn__scikit-learn-12583', 'scikit-learn__scikit-learn-12585', 'scikit-learn__scikit-learn-12625', 'scikit-learn__scikit-learn-12626', 'scikit-learn__scikit-learn-12656', 'scikit-learn__scikit-learn-12682', 'scikit-learn__scikit-learn-12704', 'scikit-learn__scikit-learn-12733', 'scikit-learn__scikit-learn-12758', 'scikit-learn__scikit-learn-12760', 'scikit-learn__scikit-learn-12784', 'scikit-learn__scikit-learn-12827', 'scikit-learn__scikit-learn-12834', 'scikit-learn__scikit-learn-12860', 'scikit-learn__scikit-learn-12908', 'scikit-learn__scikit-learn-12938', 'scikit-learn__scikit-learn-12961', 'scikit-learn__scikit-learn-12973', 'scikit-learn__scikit-learn-12983', 'scikit-learn__scikit-learn-12989', 'scikit-learn__scikit-learn-13010', 'scikit-learn__scikit-learn-13013', 'scikit-learn__scikit-learn-13017', 'scikit-learn__scikit-learn-13046', 'scikit-learn__scikit-learn-13087', 'scikit-learn__scikit-learn-13124', 'scikit-learn__scikit-learn-13135', 'scikit-learn__scikit-learn-13142', 'scikit-learn__scikit-learn-13143', 'scikit-learn__scikit-learn-13157', 'scikit-learn__scikit-learn-13165', 'scikit-learn__scikit-learn-13174', 'scikit-learn__scikit-learn-13221', 'scikit-learn__scikit-learn-13241', 'scikit-learn__scikit-learn-13253', 'scikit-learn__scikit-learn-13280', 'scikit-learn__scikit-learn-13283', 'scikit-learn__scikit-learn-13302', 'scikit-learn__scikit-learn-13313', 'scikit-learn__scikit-learn-13328', 'scikit-learn__scikit-learn-13333', 'scikit-learn__scikit-learn-13363', 'scikit-learn__scikit-learn-13368', 'scikit-learn__scikit-learn-13392', 'scikit-learn__scikit-learn-13436', 'scikit-learn__scikit-learn-13439', 'scikit-learn__scikit-learn-13447', 'scikit-learn__scikit-learn-13454', 'scikit-learn__scikit-learn-13467', 'scikit-learn__scikit-learn-13472', 'scikit-learn__scikit-learn-13485', 'scikit-learn__scikit-learn-13496', 'scikit-learn__scikit-learn-13497', 'scikit-learn__scikit-learn-13536', 'scikit-learn__scikit-learn-13549', 'scikit-learn__scikit-learn-13554', 'scikit-learn__scikit-learn-13584', 'scikit-learn__scikit-learn-13618', 'scikit-learn__scikit-learn-13620', 'scikit-learn__scikit-learn-13628', 'scikit-learn__scikit-learn-13641', 'scikit-learn__scikit-learn-13704', 'scikit-learn__scikit-learn-13726', 'scikit-learn__scikit-learn-13779', 'scikit-learn__scikit-learn-13780', 'scikit-learn__scikit-learn-13828', 'scikit-learn__scikit-learn-13864', 'scikit-learn__scikit-learn-13877', 'scikit-learn__scikit-learn-13910', 'scikit-learn__scikit-learn-13915', 'scikit-learn__scikit-learn-13933', 'scikit-learn__scikit-learn-13960', 'scikit-learn__scikit-learn-13974', 'scikit-learn__scikit-learn-13983', 'scikit-learn__scikit-learn-14012', 'scikit-learn__scikit-learn-14024', 'scikit-learn__scikit-learn-14053', 'scikit-learn__scikit-learn-14067', 'scikit-learn__scikit-learn-14087', 'scikit-learn__scikit-learn-14092', 'scikit-learn__scikit-learn-14114', 'scikit-learn__scikit-learn-14125', 'scikit-learn__scikit-learn-14141', 'scikit-learn__scikit-learn-14237', 'scikit-learn__scikit-learn-14309', 'scikit-learn__scikit-learn-14430', 'scikit-learn__scikit-learn-14450', 'scikit-learn__scikit-learn-14458', 'scikit-learn__scikit-learn-14464', 'scikit-learn__scikit-learn-14496', 'scikit-learn__scikit-learn-14520', 'scikit-learn__scikit-learn-14544', 'scikit-learn__scikit-learn-14591', 'scikit-learn__scikit-learn-14629', 'scikit-learn__scikit-learn-14704', 'scikit-learn__scikit-learn-14706', 'scikit-learn__scikit-learn-14710', 'scikit-learn__scikit-learn-14732', 'scikit-learn__scikit-learn-14764', 'scikit-learn__scikit-learn-14806', 'scikit-learn__scikit-learn-14869', 'scikit-learn__scikit-learn-14878', 'scikit-learn__scikit-learn-14890', 'scikit-learn__scikit-learn-14894', 'scikit-learn__scikit-learn-14898', 'scikit-learn__scikit-learn-14908', 'scikit-learn__scikit-learn-14983', 'scikit-learn__scikit-learn-14999', 'scikit-learn__scikit-learn-15028', 'scikit-learn__scikit-learn-15084', 'scikit-learn__scikit-learn-15086', 'scikit-learn__scikit-learn-15094', 'scikit-learn__scikit-learn-15096', 'scikit-learn__scikit-learn-15100', 'scikit-learn__scikit-learn-15119', 'scikit-learn__scikit-learn-15120', 'scikit-learn__scikit-learn-15138', 'scikit-learn__scikit-learn-15393', 'scikit-learn__scikit-learn-15495', 'scikit-learn__scikit-learn-15512', 'scikit-learn__scikit-learn-15524', 'scikit-learn__scikit-learn-15535', 'scikit-learn__scikit-learn-15625', 'scikit-learn__scikit-learn-3840', 'scikit-learn__scikit-learn-7760', 'scikit-learn__scikit-learn-8554', 'scikit-learn__scikit-learn-9274', 'scikit-learn__scikit-learn-9288', 'scikit-learn__scikit-learn-9304', 'scikit-learn__scikit-learn-9775', 'scikit-learn__scikit-learn-9939', 'sphinx-doc__sphinx-11311', 'sphinx-doc__sphinx-7910', 'sympy__sympy-12812', 'sympy__sympy-14248', 'sympy__sympy-15222', 'sympy__sympy-19201'}
BASE_IMAGE_BUILD_DIR
module-attribute
BASE_IMAGE_BUILD_DIR = Path('logs/build_images/base')
ENV_IMAGE_BUILD_DIR
module-attribute
ENV_IMAGE_BUILD_DIR = Path('logs/build_images/env')
INSTANCE_IMAGE_BUILD_DIR
module-attribute
INSTANCE_IMAGE_BUILD_DIR = Path('logs/build_images/instances')
RUN_EVALUATION_LOG_DIR
module-attribute
RUN_EVALUATION_LOG_DIR = Path('logs/run_evaluation')
RUN_VALIDATION_LOG_DIR
module-attribute
RUN_VALIDATION_LOG_DIR = Path('logs/run_validation')
FAIL_TO_PASS
module-attribute
FAIL_TO_PASS = 'FAIL_TO_PASS'
FAIL_TO_FAIL
module-attribute
FAIL_TO_FAIL = 'FAIL_TO_FAIL'
PASS_TO_PASS
module-attribute
PASS_TO_PASS = 'PASS_TO_PASS'
PASS_TO_FAIL
module-attribute
PASS_TO_FAIL = 'PASS_TO_FAIL'
KEY_INSTANCE_ID
module-attribute
KEY_INSTANCE_ID = 'instance_id'
KEY_MODEL
module-attribute
KEY_MODEL = 'model_name_or_path'
KEY_PREDICTION
module-attribute
KEY_PREDICTION = 'model_patch'
DOCKER_PATCH
module-attribute
DOCKER_PATCH = '/tmp/patch.diff'
DOCKER_USER
module-attribute
DOCKER_USER = 'root'
DOCKER_WORKDIR
module-attribute
DOCKER_WORKDIR = '/testbed'
LOG_REPORT
module-attribute
LOG_REPORT = 'report.json'
LOG_INSTANCE
module-attribute
LOG_INSTANCE = 'run_instance.log'
LOG_TEST_OUTPUT
module-attribute
LOG_TEST_OUTPUT = 'test_output.txt'
UTF8
module-attribute
UTF8 = 'utf-8'
APPLY_PATCH_FAIL
module-attribute
APPLY_PATCH_FAIL = '>>>>> Patch Apply Failed'
APPLY_PATCH_PASS
module-attribute
APPLY_PATCH_PASS = '>>>>> Applied Patch'
INSTALL_FAIL
module-attribute
INSTALL_FAIL = '>>>>> Init Failed'
INSTALL_PASS
module-attribute
INSTALL_PASS = '>>>>> Init Succeeded'
INSTALL_TIMEOUT
module-attribute
INSTALL_TIMEOUT = '>>>>> Init Timed Out'
RESET_FAILED
module-attribute
RESET_FAILED = '>>>>> Reset Failed'
TESTS_ERROR
module-attribute
TESTS_ERROR = '>>>>> Tests Errored'
TESTS_FAILED
module-attribute
TESTS_FAILED = '>>>>> Some Tests Failed'
TESTS_PASSED
module-attribute
TESTS_PASSED = '>>>>> All Tests Passed'
TESTS_TIMEOUT
module-attribute
TESTS_TIMEOUT = '>>>>> Tests Timed Out'
START_TEST_OUTPUT
module-attribute
START_TEST_OUTPUT = '>>>>> Start Test Output'
END_TEST_OUTPUT
module-attribute
END_TEST_OUTPUT = '>>>>> End Test Output'
NON_TEST_EXTS
module-attribute
NON_TEST_EXTS = ['.json', '.png', 'csv', '.txt', '.md', '.jpg', '.jpeg', '.pkl', '.yml', '.yaml', '.toml']
SWE_BENCH_URL_RAW
module-attribute
SWE_BENCH_URL_RAW = 'https://raw.githubusercontent.com/'
DEFAULT_DOCKER_SPECS
module-attribute
DEFAULT_DOCKER_SPECS = {'conda_version': 'py311_23.11.0-2', 'node_version': '21.6.2', 'pnpm_version': '9.5.0', 'python_version': '3.9', 'ubuntu_version': '22.04'}
FAIL_ONLY_REPOS
module-attribute
FAIL_ONLY_REPOS = {'chartjs/Chart.js', 'processing/p5.js', 'markedjs/marked'}
MAP_REPO_VERSION_TO_SPECS
module-attribute
MAP_REPO_VERSION_TO_SPECS = {None: MAP_REPO_VERSION_TO_SPECS_JS, None: MAP_REPO_VERSION_TO_SPECS_PY}
MAP_REPO_TO_INSTALL
module-attribute
MAP_REPO_TO_INSTALL = {None: MAP_REPO_TO_INSTALL_JS, None: MAP_REPO_TO_INSTALL_PY}
MAP_REPO_TO_EXT
module-attribute
MAP_REPO_TO_EXT = {None: {k: 'js'for k in keys()}, None: {k: 'py'for k in keys()}}
LATEST
module-attribute
LATEST = 'latest'
SWEbenchInstance
Bases: TypedDict
repo
instance-attribute
repo: str
instance_id
instance-attribute
instance_id: str
base_commit
instance-attribute
base_commit: str
patch
instance-attribute
patch: str
test_patch
instance-attribute
test_patch: str
problem_statement
instance-attribute
problem_statement: str
hints_text
instance-attribute
hints_text: str
created_at
instance-attribute
created_at: str
version
instance-attribute
version: str
FAIL_TO_PASS
instance-attribute
FAIL_TO_PASS: str
PASS_TO_PASS
instance-attribute
PASS_TO_PASS: str
environment_setup_commit
instance-attribute
environment_setup_commit: str
ResolvedStatus
Bases: Enum
NO
class-attribute
instance-attribute
NO = 'RESOLVED_NO'
PARTIAL
class-attribute
instance-attribute
PARTIAL = 'RESOLVED_PARTIAL'
FULL
class-attribute
instance-attribute
FULL = 'RESOLVED_FULL'
TestStatus
Bases: Enum
FAILED
class-attribute
instance-attribute
FAILED = 'FAILED'
PASSED
class-attribute
instance-attribute
PASSED = 'PASSED'
SKIPPED
class-attribute
instance-attribute
SKIPPED = 'SKIPPED'
ERROR
class-attribute
instance-attribute
ERROR = 'ERROR'
XFAIL
class-attribute
instance-attribute
XFAIL = 'XFAIL'
EvalType
Bases: Enum
PASS_AND_FAIL
class-attribute
instance-attribute
PASS_AND_FAIL = 'pass_and_fail'
FAIL_ONLY
class-attribute
instance-attribute
FAIL_ONLY = 'fail_only'
PatchType
Bases: Enum
PATCH_GOLD
class-attribute
instance-attribute
PATCH_GOLD = 'gold'
PATCH_PRED
class-attribute
instance-attribute
PATCH_PRED = 'pred'
PATCH_PRED_TRY
class-attribute
instance-attribute
PATCH_PRED_TRY = 'pred_try'
PATCH_PRED_MINIMAL
class-attribute
instance-attribute
PATCH_PRED_MINIMAL = 'pred_minimal'
PATCH_PRED_MINIMAL_TRY
class-attribute
instance-attribute
PATCH_PRED_MINIMAL_TRY = 'pred_minimal_try'
PATCH_TEST
class-attribute
instance-attribute
PATCH_TEST = 'test'
__str__
__str__()
Source code in swebench/harness/constants/__init__.py
96 97 |
|
javascript
TEST_XVFB_PREFIX
module-attribute
TEST_XVFB_PREFIX = 'xvfb-run --server-args="-screen 0 1280x1024x24 -ac :99"'
XVFB_DEPS
module-attribute
XVFB_DEPS = ['python3', 'python3-pip', 'xvfb', 'x11-xkb-utils', 'xfonts-100dpi', 'xfonts-75dpi', 'xfonts-scalable', 'xfonts-cyrillic', 'x11-apps', 'firefox']
X11_DEPS
module-attribute
X11_DEPS = ['libx11-xcb1', 'libxcomposite1', 'libxcursor1', 'libxdamage1', 'libxi6', 'libxtst6', 'libnss3', 'libcups2', 'libxss1', 'libxrandr2', 'libasound2', 'libatk1.0-0', 'libgtk-3-0', 'x11-utils']
SPECS_CALYPSO
module-attribute
SPECS_CALYPSO = {None: {k: {'apt-pkgs': ['libsass-dev', 'sassc'], 'install': ['npm install --unsafe-perm'], 'test_cmd': 'npm run test-client', 'docker_specs': {'node_version': k}}for k in ['0.8', '4.2.3', '4.3.0', '5.10.1', '5.11.1', '6.1.0', '6.7.0', '6.9.0', '6.9.1', '6.9.4', '6.10.0', '6.10.2', '6.10.3', '6.11.1', '6.11.2', '6.11.5', '8.9.1', '8.9.3', '8.9.4', '8.11.0', '8.11.2', '10.4.1', '10.5.0', '10.6.0', '10.9.0', '10.10.0', '10.12.0', '10.13.0', '10.14.0', '10.15.2', '10.16.3']}}
TEST_CHART_JS_TEMPLATE
module-attribute
TEST_CHART_JS_TEMPLATE = './node_modules/.bin/cross-env NODE_ENV=test ./node_modules/.bin/karma start {} --single-run --coverage --grep --auto-watch false'
SPECS_CHART_JS
module-attribute
SPECS_CHART_JS = {None: {k: {'install': ['pnpm install', 'pnpm run build'], 'test_cmd': ['pnpm install', 'pnpm run build', f'{TEST_XVFB_PREFIX} su chromeuser -c "{format('./karma.conf.cjs')}"'], 'docker_specs': {'node_version': '21.6.2', 'pnpm_version': '7.9.0', 'run_args': {'cap_add': ['SYS_ADMIN']}}}for k in ['4.0', '4.1', '4.2', '4.3', '4.4']}, None: {k: {'install': ['npm install'], 'test_cmd': ['npm install', 'npm run build', f'{TEST_XVFB_PREFIX} su chromeuser -c "{format('./karma.conf.js')}"'], 'docker_specs': {'node_version': '21.6.2', 'run_args': {'cap_add': ['SYS_ADMIN']}}}for k in ['3.0', '3.1', '3.2', '3.3', '3.4', '3.5', '3.6', '3.7', '3.8']}, None: {k: {'install': ['npm install', 'npm install -g gulp-cli'], 'test_cmd': ['npm install', 'gulp build', TEST_XVFB_PREFIX + ' su chromeuser -c "gulp test"'], 'docker_specs': {'node_version': '21.6.2', 'run_args': {'cap_add': ['SYS_ADMIN']}}}for k in ['2.0', '2.1', '2.2', '2.3', '2.4', '2.5', '2.6', '2.7', '2.8', '2.9']}}
SPECS_MARKED
module-attribute
SPECS_MARKED = {None: {k: {'install': ['npm install'], 'test_cmd': './node_modules/.bin/jasmine --no-color --config=jasmine.json', 'docker_specs': {'node_version': '12.22.12'}}for k in ['0.3', '0.5', '0.6', '0.7', '1.0', '1.1', '1.2', '2.0', '3.9', '4.0', '4.1', '5.0']}}
SPECS_P5_JS
module-attribute
SPECS_P5_JS = {None: {k: {'apt-pkgs': X11_DEPS, 'install': ['npm install', "PUPPETEER_SKIP_CHROMIUM_DOWNLOAD='' node node_modules/puppeteer/install.js", './node_modules/.bin/grunt yui'], 'test_cmd': "sed -i 's/concurrency:[[:space:]]*[0-9][0-9]*/concurrency: 1/g' Gruntfile.js\nstdbuf -o 1M ./node_modules/.bin/grunt test --quiet --force", 'docker_specs': {'node_version': '14.17.3'}}for k in ['0.10', '0.2', '0.4', '0.5', '0.6', '0.7', '0.8', '0.9', '1.0', '1.1', '1.2', '1.3', '1.4', '1.5', '1.6', '1.7', '1.8', '1.9']}}
SPECS_REACT_PDF
module-attribute
SPECS_REACT_PDF = {None: {k: {'apt-pkgs': ['pkg-config', 'build-essential', 'libpixman-1-0', 'libpixman-1-dev', 'libcairo2-dev', 'libpango1.0-dev', 'libjpeg-dev', 'libgif-dev', 'librsvg2-dev'] + X11_DEPS, 'install': ['npm i -g yarn', 'yarn install'], 'test_cmd': 'NODE_OPTIONS="--experimental-vm-modules" ./node_modules/.bin/jest --no-color', 'docker_specs': {'node_version': '18.20.4'}}for k in ['1.0', '1.1', '1.2', '2.0']}}
MAP_REPO_VERSION_TO_SPECS_JS
module-attribute
MAP_REPO_VERSION_TO_SPECS_JS = {'Automattic/wp-calypso': SPECS_CALYPSO, 'chartjs/Chart.js': SPECS_CHART_JS, 'markedjs/marked': SPECS_MARKED, 'processing/p5.js': SPECS_P5_JS, 'diegomura/react-pdf': SPECS_REACT_PDF}
MAP_REPO_TO_INSTALL_JS
module-attribute
MAP_REPO_TO_INSTALL_JS = {}
python
TEST_ASTROPY_PYTEST
module-attribute
TEST_ASTROPY_PYTEST = 'pytest -rA -vv -o console_output_style=classic --tb=no'
TEST_DJANGO
module-attribute
TEST_DJANGO = './tests/runtests.py --verbosity 2 --settings=test_sqlite --parallel 1'
TEST_DJANGO_NO_PARALLEL
module-attribute
TEST_DJANGO_NO_PARALLEL = './tests/runtests.py --verbosity 2'
TEST_SEABORN
module-attribute
TEST_SEABORN = 'pytest --no-header -rA'
TEST_SEABORN_VERBOSE
module-attribute
TEST_SEABORN_VERBOSE = 'pytest -rA --tb=long'
TEST_PYTEST
module-attribute
TEST_PYTEST = 'pytest -rA'
TEST_PYTEST_VERBOSE
module-attribute
TEST_PYTEST_VERBOSE = 'pytest -rA --tb=long'
TEST_SPHINX
module-attribute
TEST_SPHINX = 'tox --current-env -epy39 -v --'
TEST_SYMPY
module-attribute
TEST_SYMPY = "PYTHONWARNINGS='ignore::UserWarning,ignore::SyntaxWarning' bin/test -C --verbose"
TEST_SYMPY_VERBOSE
module-attribute
TEST_SYMPY_VERBOSE = 'bin/test -C --verbose'
SPECS_SKLEARN
module-attribute
SPECS_SKLEARN = {k: {'python': '3.6', 'packages': 'numpy scipy cython pytest pandas matplotlib', 'install': 'python -m pip install -v --no-use-pep517 --no-build-isolation -e .', 'pip_packages': ['cython', 'numpy==1.19.2', 'setuptools', 'scipy==1.5.2'], 'test_cmd': TEST_PYTEST}for k in ['0.20', '0.21', '0.22']}
SPECS_FLASK
module-attribute
SPECS_FLASK = {'2.0': {'python': '3.9', 'packages': 'requirements.txt', 'install': 'python -m pip install -e .', 'pip_packages': ['setuptools==70.0.0', 'Werkzeug==2.3.7', 'Jinja2==3.0.1', 'itsdangerous==2.1.2', 'click==8.0.1', 'MarkupSafe==2.1.3'], 'test_cmd': TEST_PYTEST}, '2.1': {'python': '3.10', 'packages': 'requirements.txt', 'install': 'python -m pip install -e .', 'pip_packages': ['setuptools==70.0.0', 'click==8.1.3', 'itsdangerous==2.1.2', 'Jinja2==3.1.2', 'MarkupSafe==2.1.1', 'Werkzeug==2.3.7'], 'test_cmd': TEST_PYTEST}}
SPECS_DJANGO
module-attribute
SPECS_DJANGO = {k: {'python': '3.5', 'packages': 'requirements.txt', 'pre_install': ['apt-get update && apt-get install -y locales', "echo 'en_US UTF-8' > /etc/locale.gen", 'locale-gen en_US.UTF-8'], 'install': 'python setup.py install', 'pip_packages': ['setuptools'], 'eval_commands': ['export LANG=en_US.UTF-8', 'export LC_ALL=en_US.UTF-8', 'export PYTHONIOENCODING=utf8', 'export LANGUAGE=en_US:en'], 'test_cmd': TEST_DJANGO}for k in ['1.7', '1.8', '1.9', '1.10', '1.11', '2.0', '2.1', '2.2']}
SPECS_REQUESTS
module-attribute
SPECS_REQUESTS = {k: {'python': '3.9', 'packages': 'pytest', 'install': 'python -m pip install .', 'test_cmd': TEST_PYTEST}for k in ['0.7', '0.8', '0.9', '0.11', '0.13', '0.14', '1.1', '1.2', '2.0', '2.2'] + ['2.3', '2.4', '2.5', '2.7', '2.8', '2.9', '2.10', '2.11', '2.12', '2.17'] + ['2.18', '2.19', '2.22', '2.26', '2.25', '2.27', '2.31', '3.0']}
SPECS_SEABORN
module-attribute
SPECS_SEABORN = {k: {'python': '3.9', 'install': 'python -m pip install -e .', 'pip_packages': ['contourpy==1.1.0', 'cycler==0.11.0', 'fonttools==4.42.1', 'importlib-resources==6.0.1', 'kiwisolver==1.4.5', 'matplotlib==3.7.2', 'numpy==1.25.2', 'packaging==23.1', 'pandas==1.3.5', 'pillow==10.0.0', 'pyparsing==3.0.9', 'pytest', 'python-dateutil==2.8.2', 'pytz==2023.3.post1', 'scipy==1.11.2', 'six==1.16.0', 'tzdata==2023.1', 'zipp==3.16.2'], 'test_cmd': TEST_SEABORN}for k in ['0.11']}
SPECS_PYTEST
module-attribute
SPECS_PYTEST = {k: {'python': '3.9', 'install': 'python -m pip install -e .', 'test_cmd': TEST_PYTEST}for k in ['4.4', '4.5', '4.6', '5.0', '5.1', '5.2', '5.3', '5.4', '6.0', '6.2', '6.3', '7.0', '7.1', '7.2', '7.4', '8.0', '8.1', '8.2', '8.3', '8.4']}
SPECS_MATPLOTLIB
module-attribute
SPECS_MATPLOTLIB = {k: {'python': '3.11', 'packages': 'environment.yml', 'install': 'python -m pip install -e .', 'pre_install': ['apt-get -y update && apt-get -y upgrade && DEBIAN_FRONTEND=noninteractive apt-get install -y imagemagick ffmpeg texlive texlive-latex-extra texlive-fonts-recommended texlive-xetex texlive-luatex cm-super dvipng', 'QHULL_URL="http://www.qhull.org/download/qhull-2020-src-8.0.2.tgz"', 'QHULL_TAR="/tmp/qhull-2020-src-8.0.2.tgz"', 'QHULL_BUILD_DIR="/testbed/build"', 'wget -O "$QHULL_TAR" "$QHULL_URL"', 'mkdir -p "$QHULL_BUILD_DIR"', 'tar -xvzf "$QHULL_TAR" -C "$QHULL_BUILD_DIR"'], 'pip_packages': ['contourpy==1.1.0', 'cycler==0.11.0', 'fonttools==4.42.1', 'ghostscript', 'kiwisolver==1.4.5', 'numpy==1.25.2', 'packaging==23.1', 'pillow==10.0.0', 'pikepdf', 'pyparsing==3.0.9', 'python-dateutil==2.8.2', 'six==1.16.0', 'setuptools==68.1.2', 'setuptools-scm==7.1.0', 'typing-extensions==4.7.1'], 'test_cmd': TEST_PYTEST}for k in ['3.5', '3.6', '3.7', '3.8', '3.9']}
SPECS_SPHINX
module-attribute
SPECS_SPHINX = {k: {'python': '3.9', 'pip_packages': ['tox==4.16.0', 'tox-current-env==0.0.11', 'Jinja2==3.0.3'], 'install': 'python -m pip install -e .[test]', 'pre_install': ["sed -i 's/pytest/pytest -rA/' tox.ini"], 'test_cmd': TEST_SPHINX}for k in ['1.5', '1.6', '1.7', '1.8', '2.0', '2.1', '2.2', '2.3', '2.4', '3.0'] + ['3.1', '3.2', '3.3', '3.4', '3.5', '4.0', '4.1', '4.2', '4.3', '4.4'] + ['4.5', '5.0', '5.1', '5.2', '5.3', '6.0', '6.2', '7.0', '7.1', '7.2'] + ['7.3', '7.4', '8.0', '8.1']}
SPECS_ASTROPY
module-attribute
SPECS_ASTROPY = {k: {'python': '3.9', 'install': 'python -m pip install -e .[test] --verbose', 'pip_packages': ['attrs==23.1.0', 'exceptiongroup==1.1.3', 'execnet==2.0.2', 'hypothesis==6.82.6', 'iniconfig==2.0.0', 'numpy==1.25.2', 'packaging==23.1', 'pluggy==1.3.0', 'psutil==5.9.5', 'pyerfa==2.0.0.3', 'pytest-arraydiff==0.5.0', 'pytest-astropy-header==0.2.2', 'pytest-astropy==0.10.0', 'pytest-cov==4.1.0', 'pytest-doctestplus==1.0.0', 'pytest-filter-subpackage==0.1.2', 'pytest-mock==3.11.1', 'pytest-openfiles==0.5.0', 'pytest-remotedata==0.4.0', 'pytest-xdist==3.3.1', 'pytest==7.4.0', 'PyYAML==6.0.1', 'setuptools==68.0.0', 'sortedcontainers==2.4.0', 'tomli==2.0.1'], 'test_cmd': TEST_PYTEST}for k in ['3.0', '3.1', '3.2', '4.1', '4.2', '4.3', '5.0', '5.1', '5.2', 'v5.3']}
SPECS_SYMPY
module-attribute
SPECS_SYMPY = {k: {'python': '3.9', 'packages': 'mpmath flake8', 'pip_packages': ['mpmath==1.3.0', 'flake8-comprehensions'], 'install': 'python -m pip install -e .', 'test_cmd': TEST_SYMPY}for k in ['0.7', '1.0', '1.1', '1.10', '1.11', '1.12', '1.2', '1.4', '1.5', '1.6'] + ['1.7', '1.8', '1.9'] + ['1.10', '1.11', '1.12', '1.13', '1.14']}
SPECS_PYLINT
module-attribute
SPECS_PYLINT = {k: {'python': '3.9', 'packages': 'requirements.txt', 'install': 'python -m pip install -e .', 'test_cmd': TEST_PYTEST}for k in ['2.10', '2.11', '2.13', '2.14', '2.15', '2.16', '2.17', '2.8', '2.9', '3.0', '3.1', '3.2', '3.3', '4.0']}
SPECS_XARRAY
module-attribute
SPECS_XARRAY = {k: {'python': '3.10', 'packages': 'environment.yml', 'install': 'python -m pip install -e .', 'pip_packages': ['numpy==1.23.0', 'packaging==23.1', 'pandas==1.5.3', 'pytest==7.4.0', 'python-dateutil==2.8.2', 'pytz==2023.3', 'six==1.16.0', 'scipy==1.11.1', 'setuptools==68.0.0', 'dask==2022.8.1'], 'no_use_env': True, 'test_cmd': TEST_PYTEST}for k in ['0.12', '0.18', '0.19', '0.20', '2022.03', '2022.06', '2022.09', '2023.07', '2024.05']}
SPECS_SQLFLUFF
module-attribute
SPECS_SQLFLUFF = {k: {'python': '3.9', 'packages': 'requirements.txt', 'install': 'python -m pip install -e .', 'test_cmd': TEST_PYTEST}for k in ['0.10', '0.11', '0.12', '0.13', '0.4', '0.5', '0.6', '0.8', '0.9', '1.0', '1.1', '1.2', '1.3', '1.4', '2.0', '2.1', '2.2']}
SPECS_DBT_CORE
module-attribute
SPECS_DBT_CORE = {k: {'python': '3.9', 'packages': 'requirements.txt', 'install': 'python -m pip install -e .'}for k in ['0.13', '0.14', '0.15', '0.16', '0.17', '0.18', '0.19', '0.20', '0.21', '1.0', '1.1', '1.2', '1.3', '1.4', '1.5', '1.6', '1.7']}
SPECS_PYVISTA
module-attribute
SPECS_PYVISTA = {k: {'python': '3.9', 'install': 'python -m pip install -e .', 'pip_packages': ['pytest'], 'test_cmd': TEST_PYTEST}for k in ['0.20', '0.21', '0.22', '0.23']}
SPECS_ASTROID
module-attribute
SPECS_ASTROID = {k: {'python': '3.9', 'install': 'python -m pip install -e .', 'pip_packages': ['pytest'], 'test_cmd': TEST_PYTEST}for k in ['2.10', '2.12', '2.13', '2.14', '2.15', '2.16', '2.5', '2.6', '2.7', '2.8', '2.9', '3.0']}
SPECS_MARSHMALLOW
module-attribute
SPECS_MARSHMALLOW = {k: {'python': '3.9', 'install': "python -m pip install -e '.[dev]'", 'test_cmd': TEST_PYTEST}for k in ['2.18', '2.19', '2.20', '3.0', '3.1', '3.10', '3.11', '3.12', '3.13', '3.15', '3.16', '3.19', '3.2', '3.4', '3.8', '3.9']}
SPECS_PVLIB
module-attribute
SPECS_PVLIB = {k: {'python': '3.9', 'install': 'python -m pip install -e .[all]', 'packages': 'pandas scipy', 'pip_packages': ['jupyter', 'ipython', 'matplotlib', 'pytest', 'flake8'], 'test_cmd': TEST_PYTEST}for k in ['0.1', '0.2', '0.3', '0.4', '0.5', '0.6', '0.7', '0.8', '0.9']}
SPECS_PYDICOM
module-attribute
SPECS_PYDICOM = {k: {'python': '3.6', 'install': 'python -m pip install -e .', 'packages': 'numpy', 'pip_packages': ['pytest'], 'test_cmd': TEST_PYTEST}for k in ['1.0', '1.1', '1.2', '1.3', '1.4', '2.0', '2.1', '2.2', '2.3', '2.4', '3.0']}
SPECS_HUMANEVAL
module-attribute
SPECS_HUMANEVAL = {k: {'python': '3.9', 'test_cmd': 'python'}for k in ['1.0']}
MAP_REPO_VERSION_TO_SPECS_PY
module-attribute
MAP_REPO_VERSION_TO_SPECS_PY = {'astropy/astropy': SPECS_ASTROPY, 'dbt-labs/dbt-core': SPECS_DBT_CORE, 'django/django': SPECS_DJANGO, 'matplotlib/matplotlib': SPECS_MATPLOTLIB, 'marshmallow-code/marshmallow': SPECS_MARSHMALLOW, 'mwaskom/seaborn': SPECS_SEABORN, 'pallets/flask': SPECS_FLASK, 'psf/requests': SPECS_REQUESTS, 'pvlib/pvlib-python': SPECS_PVLIB, 'pydata/xarray': SPECS_XARRAY, 'pydicom/pydicom': SPECS_PYDICOM, 'pylint-dev/astroid': SPECS_ASTROID, 'pylint-dev/pylint': SPECS_PYLINT, 'pytest-dev/pytest': SPECS_PYTEST, 'pyvista/pyvista': SPECS_PYVISTA, 'scikit-learn/scikit-learn': SPECS_SKLEARN, 'sphinx-doc/sphinx': SPECS_SPHINX, 'sqlfluff/sqlfluff': SPECS_SQLFLUFF, 'swe-bench/humaneval': SPECS_HUMANEVAL, 'sympy/sympy': SPECS_SYMPY}
MAP_REPO_TO_INSTALL_PY
module-attribute
MAP_REPO_TO_INSTALL_PY = {}
MAP_REPO_TO_REQS_PATHS
module-attribute
MAP_REPO_TO_REQS_PATHS = {'dbt-labs/dbt-core': ['dev-requirements.txt', 'dev_requirements.txt'], 'django/django': ['tests/requirements/py3.txt'], 'matplotlib/matplotlib': ['requirements/dev/dev-requirements.txt', 'requirements/testing/travis_all.txt'], 'pallets/flask': ['requirements/dev.txt'], 'pylint-dev/pylint': ['requirements_test.txt'], 'pyvista/pyvista': ['requirements_test.txt', 'requirements.txt'], 'sqlfluff/sqlfluff': ['requirements_dev.txt'], 'sympy/sympy': ['requirements-dev.txt', 'requirements-test.txt']}
MAP_REPO_TO_ENV_YML_PATHS
module-attribute
MAP_REPO_TO_ENV_YML_PATHS = {'matplotlib/matplotlib': ['environment.yml'], 'pydata/xarray': ['ci/requirements/environment.yml', 'environment.yml']}
USE_X86_PY
module-attribute
USE_X86_PY = {'astropy__astropy-7973', 'django__django-10087', 'django__django-10097', 'django__django-10213', 'django__django-10301', 'django__django-10316', 'django__django-10426', 'django__django-11383', 'django__django-12185', 'django__django-12497', 'django__django-13121', 'django__django-13417', 'django__django-13431', 'django__django-13447', 'django__django-14155', 'django__django-14164', 'django__django-14169', 'django__django-14170', 'django__django-15180', 'django__django-15199', 'django__django-15280', 'django__django-15292', 'django__django-15474', 'django__django-15682', 'django__django-15689', 'django__django-15695', 'django__django-15698', 'django__django-15781', 'django__django-15925', 'django__django-15930', 'django__django-5158', 'django__django-5470', 'django__django-7188', 'django__django-7475', 'django__django-7530', 'django__django-8326', 'django__django-8961', 'django__django-9003', 'django__django-9703', 'django__django-9871', 'matplotlib__matplotlib-13983', 'matplotlib__matplotlib-13984', 'matplotlib__matplotlib-13989', 'matplotlib__matplotlib-14043', 'matplotlib__matplotlib-14471', 'matplotlib__matplotlib-22711', 'matplotlib__matplotlib-22719', 'matplotlib__matplotlib-22734', 'matplotlib__matplotlib-22767', 'matplotlib__matplotlib-22815', 'matplotlib__matplotlib-22835', 'matplotlib__matplotlib-22865', 'matplotlib__matplotlib-22871', 'matplotlib__matplotlib-22883', 'matplotlib__matplotlib-22926', 'matplotlib__matplotlib-22929', 'matplotlib__matplotlib-22931', 'matplotlib__matplotlib-22945', 'matplotlib__matplotlib-22991', 'matplotlib__matplotlib-23031', 'matplotlib__matplotlib-23047', 'matplotlib__matplotlib-23049', 'matplotlib__matplotlib-23057', 'matplotlib__matplotlib-23088', 'matplotlib__matplotlib-23111', 'matplotlib__matplotlib-23140', 'matplotlib__matplotlib-23174', 'matplotlib__matplotlib-23188', 'matplotlib__matplotlib-23198', 'matplotlib__matplotlib-23203', 'matplotlib__matplotlib-23266', 'matplotlib__matplotlib-23267', 'matplotlib__matplotlib-23288', 'matplotlib__matplotlib-23299', 'matplotlib__matplotlib-23314', 'matplotlib__matplotlib-23332', 'matplotlib__matplotlib-23348', 'matplotlib__matplotlib-23412', 'matplotlib__matplotlib-23476', 'matplotlib__matplotlib-23516', 'matplotlib__matplotlib-23562', 'matplotlib__matplotlib-23563', 'matplotlib__matplotlib-23573', 'matplotlib__matplotlib-23740', 'matplotlib__matplotlib-23742', 'matplotlib__matplotlib-23913', 'matplotlib__matplotlib-23964', 'matplotlib__matplotlib-23987', 'matplotlib__matplotlib-24013', 'matplotlib__matplotlib-24026', 'matplotlib__matplotlib-24088', 'matplotlib__matplotlib-24111', 'matplotlib__matplotlib-24149', 'matplotlib__matplotlib-24177', 'matplotlib__matplotlib-24189', 'matplotlib__matplotlib-24224', 'matplotlib__matplotlib-24250', 'matplotlib__matplotlib-24257', 'matplotlib__matplotlib-24265', 'matplotlib__matplotlib-24334', 'matplotlib__matplotlib-24362', 'matplotlib__matplotlib-24403', 'matplotlib__matplotlib-24431', 'matplotlib__matplotlib-24538', 'matplotlib__matplotlib-24570', 'matplotlib__matplotlib-24604', 'matplotlib__matplotlib-24619', 'matplotlib__matplotlib-24627', 'matplotlib__matplotlib-24637', 'matplotlib__matplotlib-24691', 'matplotlib__matplotlib-24749', 'matplotlib__matplotlib-24768', 'matplotlib__matplotlib-24849', 'matplotlib__matplotlib-24870', 'matplotlib__matplotlib-24912', 'matplotlib__matplotlib-24924', 'matplotlib__matplotlib-24970', 'matplotlib__matplotlib-24971', 'matplotlib__matplotlib-25027', 'matplotlib__matplotlib-25052', 'matplotlib__matplotlib-25079', 'matplotlib__matplotlib-25085', 'matplotlib__matplotlib-25122', 'matplotlib__matplotlib-25126', 'matplotlib__matplotlib-25129', 'matplotlib__matplotlib-25238', 'matplotlib__matplotlib-25281', 'matplotlib__matplotlib-25287', 'matplotlib__matplotlib-25311', 'matplotlib__matplotlib-25332', 'matplotlib__matplotlib-25334', 'matplotlib__matplotlib-25340', 'matplotlib__matplotlib-25346', 'matplotlib__matplotlib-25404', 'matplotlib__matplotlib-25405', 'matplotlib__matplotlib-25425', 'matplotlib__matplotlib-25430', 'matplotlib__matplotlib-25433', 'matplotlib__matplotlib-25442', 'matplotlib__matplotlib-25479', 'matplotlib__matplotlib-25498', 'matplotlib__matplotlib-25499', 'matplotlib__matplotlib-25515', 'matplotlib__matplotlib-25547', 'matplotlib__matplotlib-25551', 'matplotlib__matplotlib-25565', 'matplotlib__matplotlib-25624', 'matplotlib__matplotlib-25631', 'matplotlib__matplotlib-25640', 'matplotlib__matplotlib-25651', 'matplotlib__matplotlib-25667', 'matplotlib__matplotlib-25712', 'matplotlib__matplotlib-25746', 'matplotlib__matplotlib-25772', 'matplotlib__matplotlib-25775', 'matplotlib__matplotlib-25779', 'matplotlib__matplotlib-25785', 'matplotlib__matplotlib-25794', 'matplotlib__matplotlib-25859', 'matplotlib__matplotlib-25960', 'matplotlib__matplotlib-26011', 'matplotlib__matplotlib-26020', 'matplotlib__matplotlib-26024', 'matplotlib__matplotlib-26078', 'matplotlib__matplotlib-26089', 'matplotlib__matplotlib-26101', 'matplotlib__matplotlib-26113', 'matplotlib__matplotlib-26122', 'matplotlib__matplotlib-26160', 'matplotlib__matplotlib-26184', 'matplotlib__matplotlib-26208', 'matplotlib__matplotlib-26223', 'matplotlib__matplotlib-26232', 'matplotlib__matplotlib-26249', 'matplotlib__matplotlib-26278', 'matplotlib__matplotlib-26285', 'matplotlib__matplotlib-26291', 'matplotlib__matplotlib-26300', 'matplotlib__matplotlib-26311', 'matplotlib__matplotlib-26341', 'matplotlib__matplotlib-26342', 'matplotlib__matplotlib-26399', 'matplotlib__matplotlib-26466', 'matplotlib__matplotlib-26469', 'matplotlib__matplotlib-26472', 'matplotlib__matplotlib-26479', 'matplotlib__matplotlib-26532', 'pydata__xarray-2905', 'pydata__xarray-2922', 'pydata__xarray-3095', 'pydata__xarray-3114', 'pydata__xarray-3151', 'pydata__xarray-3156', 'pydata__xarray-3159', 'pydata__xarray-3239', 'pydata__xarray-3302', 'pydata__xarray-3305', 'pydata__xarray-3338', 'pydata__xarray-3364', 'pydata__xarray-3406', 'pydata__xarray-3520', 'pydata__xarray-3527', 'pydata__xarray-3631', 'pydata__xarray-3635', 'pydata__xarray-3637', 'pydata__xarray-3649', 'pydata__xarray-3677', 'pydata__xarray-3733', 'pydata__xarray-3812', 'pydata__xarray-3905', 'pydata__xarray-3976', 'pydata__xarray-3979', 'pydata__xarray-3993', 'pydata__xarray-4075', 'pydata__xarray-4094', 'pydata__xarray-4098', 'pydata__xarray-4182', 'pydata__xarray-4184', 'pydata__xarray-4248', 'pydata__xarray-4339', 'pydata__xarray-4356', 'pydata__xarray-4419', 'pydata__xarray-4423', 'pydata__xarray-4442', 'pydata__xarray-4493', 'pydata__xarray-4510', 'pydata__xarray-4629', 'pydata__xarray-4683', 'pydata__xarray-4684', 'pydata__xarray-4687', 'pydata__xarray-4695', 'pydata__xarray-4750', 'pydata__xarray-4758', 'pydata__xarray-4759', 'pydata__xarray-4767', 'pydata__xarray-4802', 'pydata__xarray-4819', 'pydata__xarray-4827', 'pydata__xarray-4879', 'pydata__xarray-4911', 'pydata__xarray-4939', 'pydata__xarray-4940', 'pydata__xarray-4966', 'pydata__xarray-4994', 'pydata__xarray-5033', 'pydata__xarray-5126', 'pydata__xarray-5131', 'pydata__xarray-5180', 'pydata__xarray-5187', 'pydata__xarray-5233', 'pydata__xarray-5362', 'pydata__xarray-5365', 'pydata__xarray-5455', 'pydata__xarray-5580', 'pydata__xarray-5662', 'pydata__xarray-5682', 'pydata__xarray-5731', 'pydata__xarray-6135', 'pydata__xarray-6386', 'pydata__xarray-6394', 'pydata__xarray-6400', 'pydata__xarray-6461', 'pydata__xarray-6548', 'pydata__xarray-6598', 'pydata__xarray-6599', 'pydata__xarray-6601', 'pydata__xarray-6721', 'pydata__xarray-6744', 'pydata__xarray-6798', 'pydata__xarray-6804', 'pydata__xarray-6823', 'pydata__xarray-6857', 'pydata__xarray-6882', 'pydata__xarray-6889', 'pydata__xarray-6938', 'pydata__xarray-6971', 'pydata__xarray-6992', 'pydata__xarray-6999', 'pydata__xarray-7003', 'pydata__xarray-7019', 'pydata__xarray-7052', 'pydata__xarray-7089', 'pydata__xarray-7101', 'pydata__xarray-7105', 'pydata__xarray-7112', 'pydata__xarray-7120', 'pydata__xarray-7147', 'pydata__xarray-7150', 'pydata__xarray-7179', 'pydata__xarray-7203', 'pydata__xarray-7229', 'pydata__xarray-7233', 'pydata__xarray-7347', 'pydata__xarray-7391', 'pydata__xarray-7393', 'pydata__xarray-7400', 'pydata__xarray-7444', 'pytest-dev__pytest-10482', 'scikit-learn__scikit-learn-10198', 'scikit-learn__scikit-learn-10297', 'scikit-learn__scikit-learn-10306', 'scikit-learn__scikit-learn-10331', 'scikit-learn__scikit-learn-10377', 'scikit-learn__scikit-learn-10382', 'scikit-learn__scikit-learn-10397', 'scikit-learn__scikit-learn-10427', 'scikit-learn__scikit-learn-10428', 'scikit-learn__scikit-learn-10443', 'scikit-learn__scikit-learn-10452', 'scikit-learn__scikit-learn-10459', 'scikit-learn__scikit-learn-10471', 'scikit-learn__scikit-learn-10483', 'scikit-learn__scikit-learn-10495', 'scikit-learn__scikit-learn-10508', 'scikit-learn__scikit-learn-10558', 'scikit-learn__scikit-learn-10577', 'scikit-learn__scikit-learn-10581', 'scikit-learn__scikit-learn-10687', 'scikit-learn__scikit-learn-10774', 'scikit-learn__scikit-learn-10777', 'scikit-learn__scikit-learn-10803', 'scikit-learn__scikit-learn-10844', 'scikit-learn__scikit-learn-10870', 'scikit-learn__scikit-learn-10881', 'scikit-learn__scikit-learn-10899', 'scikit-learn__scikit-learn-10908', 'scikit-learn__scikit-learn-10913', 'scikit-learn__scikit-learn-10949', 'scikit-learn__scikit-learn-10982', 'scikit-learn__scikit-learn-10986', 'scikit-learn__scikit-learn-11040', 'scikit-learn__scikit-learn-11042', 'scikit-learn__scikit-learn-11043', 'scikit-learn__scikit-learn-11151', 'scikit-learn__scikit-learn-11160', 'scikit-learn__scikit-learn-11206', 'scikit-learn__scikit-learn-11235', 'scikit-learn__scikit-learn-11243', 'scikit-learn__scikit-learn-11264', 'scikit-learn__scikit-learn-11281', 'scikit-learn__scikit-learn-11310', 'scikit-learn__scikit-learn-11315', 'scikit-learn__scikit-learn-11333', 'scikit-learn__scikit-learn-11346', 'scikit-learn__scikit-learn-11391', 'scikit-learn__scikit-learn-11496', 'scikit-learn__scikit-learn-11542', 'scikit-learn__scikit-learn-11574', 'scikit-learn__scikit-learn-11578', 'scikit-learn__scikit-learn-11585', 'scikit-learn__scikit-learn-11596', 'scikit-learn__scikit-learn-11635', 'scikit-learn__scikit-learn-12258', 'scikit-learn__scikit-learn-12421', 'scikit-learn__scikit-learn-12443', 'scikit-learn__scikit-learn-12462', 'scikit-learn__scikit-learn-12471', 'scikit-learn__scikit-learn-12486', 'scikit-learn__scikit-learn-12557', 'scikit-learn__scikit-learn-12583', 'scikit-learn__scikit-learn-12585', 'scikit-learn__scikit-learn-12625', 'scikit-learn__scikit-learn-12626', 'scikit-learn__scikit-learn-12656', 'scikit-learn__scikit-learn-12682', 'scikit-learn__scikit-learn-12704', 'scikit-learn__scikit-learn-12733', 'scikit-learn__scikit-learn-12758', 'scikit-learn__scikit-learn-12760', 'scikit-learn__scikit-learn-12784', 'scikit-learn__scikit-learn-12827', 'scikit-learn__scikit-learn-12834', 'scikit-learn__scikit-learn-12860', 'scikit-learn__scikit-learn-12908', 'scikit-learn__scikit-learn-12938', 'scikit-learn__scikit-learn-12961', 'scikit-learn__scikit-learn-12973', 'scikit-learn__scikit-learn-12983', 'scikit-learn__scikit-learn-12989', 'scikit-learn__scikit-learn-13010', 'scikit-learn__scikit-learn-13013', 'scikit-learn__scikit-learn-13017', 'scikit-learn__scikit-learn-13046', 'scikit-learn__scikit-learn-13087', 'scikit-learn__scikit-learn-13124', 'scikit-learn__scikit-learn-13135', 'scikit-learn__scikit-learn-13142', 'scikit-learn__scikit-learn-13143', 'scikit-learn__scikit-learn-13157', 'scikit-learn__scikit-learn-13165', 'scikit-learn__scikit-learn-13174', 'scikit-learn__scikit-learn-13221', 'scikit-learn__scikit-learn-13241', 'scikit-learn__scikit-learn-13253', 'scikit-learn__scikit-learn-13280', 'scikit-learn__scikit-learn-13283', 'scikit-learn__scikit-learn-13302', 'scikit-learn__scikit-learn-13313', 'scikit-learn__scikit-learn-13328', 'scikit-learn__scikit-learn-13333', 'scikit-learn__scikit-learn-13363', 'scikit-learn__scikit-learn-13368', 'scikit-learn__scikit-learn-13392', 'scikit-learn__scikit-learn-13436', 'scikit-learn__scikit-learn-13439', 'scikit-learn__scikit-learn-13447', 'scikit-learn__scikit-learn-13454', 'scikit-learn__scikit-learn-13467', 'scikit-learn__scikit-learn-13472', 'scikit-learn__scikit-learn-13485', 'scikit-learn__scikit-learn-13496', 'scikit-learn__scikit-learn-13497', 'scikit-learn__scikit-learn-13536', 'scikit-learn__scikit-learn-13549', 'scikit-learn__scikit-learn-13554', 'scikit-learn__scikit-learn-13584', 'scikit-learn__scikit-learn-13618', 'scikit-learn__scikit-learn-13620', 'scikit-learn__scikit-learn-13628', 'scikit-learn__scikit-learn-13641', 'scikit-learn__scikit-learn-13704', 'scikit-learn__scikit-learn-13726', 'scikit-learn__scikit-learn-13779', 'scikit-learn__scikit-learn-13780', 'scikit-learn__scikit-learn-13828', 'scikit-learn__scikit-learn-13864', 'scikit-learn__scikit-learn-13877', 'scikit-learn__scikit-learn-13910', 'scikit-learn__scikit-learn-13915', 'scikit-learn__scikit-learn-13933', 'scikit-learn__scikit-learn-13960', 'scikit-learn__scikit-learn-13974', 'scikit-learn__scikit-learn-13983', 'scikit-learn__scikit-learn-14012', 'scikit-learn__scikit-learn-14024', 'scikit-learn__scikit-learn-14053', 'scikit-learn__scikit-learn-14067', 'scikit-learn__scikit-learn-14087', 'scikit-learn__scikit-learn-14092', 'scikit-learn__scikit-learn-14114', 'scikit-learn__scikit-learn-14125', 'scikit-learn__scikit-learn-14141', 'scikit-learn__scikit-learn-14237', 'scikit-learn__scikit-learn-14309', 'scikit-learn__scikit-learn-14430', 'scikit-learn__scikit-learn-14450', 'scikit-learn__scikit-learn-14458', 'scikit-learn__scikit-learn-14464', 'scikit-learn__scikit-learn-14496', 'scikit-learn__scikit-learn-14520', 'scikit-learn__scikit-learn-14544', 'scikit-learn__scikit-learn-14591', 'scikit-learn__scikit-learn-14629', 'scikit-learn__scikit-learn-14704', 'scikit-learn__scikit-learn-14706', 'scikit-learn__scikit-learn-14710', 'scikit-learn__scikit-learn-14732', 'scikit-learn__scikit-learn-14764', 'scikit-learn__scikit-learn-14806', 'scikit-learn__scikit-learn-14869', 'scikit-learn__scikit-learn-14878', 'scikit-learn__scikit-learn-14890', 'scikit-learn__scikit-learn-14894', 'scikit-learn__scikit-learn-14898', 'scikit-learn__scikit-learn-14908', 'scikit-learn__scikit-learn-14983', 'scikit-learn__scikit-learn-14999', 'scikit-learn__scikit-learn-15028', 'scikit-learn__scikit-learn-15084', 'scikit-learn__scikit-learn-15086', 'scikit-learn__scikit-learn-15094', 'scikit-learn__scikit-learn-15096', 'scikit-learn__scikit-learn-15100', 'scikit-learn__scikit-learn-15119', 'scikit-learn__scikit-learn-15120', 'scikit-learn__scikit-learn-15138', 'scikit-learn__scikit-learn-15393', 'scikit-learn__scikit-learn-15495', 'scikit-learn__scikit-learn-15512', 'scikit-learn__scikit-learn-15524', 'scikit-learn__scikit-learn-15535', 'scikit-learn__scikit-learn-15625', 'scikit-learn__scikit-learn-3840', 'scikit-learn__scikit-learn-7760', 'scikit-learn__scikit-learn-8554', 'scikit-learn__scikit-learn-9274', 'scikit-learn__scikit-learn-9288', 'scikit-learn__scikit-learn-9304', 'scikit-learn__scikit-learn-9775', 'scikit-learn__scikit-learn-9939', 'sphinx-doc__sphinx-11311', 'sphinx-doc__sphinx-7910', 'sympy__sympy-12812', 'sympy__sympy-14248', 'sympy__sympy-15222', 'sympy__sympy-19201'}
docker_build
BuildImageError
BuildImageError(image_name, message, logger)
Bases: Exception
Source code in swebench/harness/docker_build.py
28 29 30 31 32 33 |
|
super_str
instance-attribute
super_str = __str__()
image_name
instance-attribute
image_name = image_name
log_path
instance-attribute
log_path = log_file
logger
instance-attribute
logger = logger
__str__
__str__()
Source code in swebench/harness/docker_build.py
35 36 37 38 39 |
|
setup_logger
setup_logger(instance_id: str, log_file: Path, mode='w', add_stdout: bool = False)
This logger is used for logging the build process of images and containers. It writes logs to the log file.
If add_stdout
is True, logs will also be sent to stdout, which can be used for
streaming ephemeral output from Modal containers.
Source code in swebench/harness/docker_build.py
42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
|
close_logger
close_logger(logger)
Source code in swebench/harness/docker_build.py
69 70 71 72 73 |
|
build_image
build_image(image_name: str, setup_scripts: dict, dockerfile: str, platform: str, client: DockerClient, build_dir: Path, nocache: bool = False)
Builds a docker image with the given name, setup scripts, dockerfile, and platform.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
image_name
|
str
|
Name of the image to build |
required |
setup_scripts
|
dict
|
Dictionary of setup script names to setup script contents |
required |
dockerfile
|
str
|
Contents of the Dockerfile |
required |
platform
|
str
|
Platform to build the image for |
required |
client
|
DockerClient
|
Docker client to use for building the image |
required |
build_dir
|
Path
|
Directory for the build context (will also contain logs, scripts, and artifacts) |
required |
nocache
|
bool
|
Whether to use the cache when building |
False
|
Source code in swebench/harness/docker_build.py
76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 |
|
build_base_images
build_base_images(client: DockerClient, dataset: list, force_rebuild: bool = False)
Builds the base images required for the dataset if they do not already exist.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
client
|
DockerClient
|
Docker client to use for building the images |
required |
dataset
|
list
|
List of test specs or dataset to build images for |
required |
force_rebuild
|
bool
|
Whether to force rebuild the images even if they already exist |
False
|
Source code in swebench/harness/docker_build.py
164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 |
|
get_env_configs_to_build
get_env_configs_to_build(client: DockerClient, dataset: list)
Returns a dictionary of image names to build scripts and dockerfiles for environment images. Returns only the environment images that need to be built.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
client
|
DockerClient
|
Docker client to use for building the images |
required |
dataset
|
list
|
List of test specs or dataset to build images for |
required |
Source code in swebench/harness/docker_build.py
207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 |
|
build_env_images
build_env_images(client: DockerClient, dataset: list, force_rebuild: bool = False, max_workers: int = 4)
Builds the environment images required for the dataset if they do not already exist.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
client
|
DockerClient
|
Docker client to use for building the images |
required |
dataset
|
list
|
List of test specs or dataset to build images for |
required |
force_rebuild
|
bool
|
Whether to force rebuild the images even if they already exist |
False
|
max_workers
|
int
|
Maximum number of workers to use for building images |
4
|
Source code in swebench/harness/docker_build.py
254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 |
|
build_instance_images
build_instance_images(client: DockerClient, dataset: list, force_rebuild: bool = False, max_workers: int = 4, namespace: str = None, tag: str = None)
Builds the instance images required for the dataset if they do not already exist.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
dataset
|
list
|
List of test specs or dataset to build images for |
required |
client
|
DockerClient
|
Docker client to use for building the images |
required |
force_rebuild
|
bool
|
Whether to force rebuild the images even if they already exist |
False
|
max_workers
|
int
|
Maximum number of workers to use for building images |
4
|
Source code in swebench/harness/docker_build.py
305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 |
|
build_instance_image
build_instance_image(test_spec: TestSpec, client: DockerClient, logger: Logger | None, nocache: bool)
Builds the instance image for the given test spec if it does not already exist.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
test_spec
|
TestSpec
|
Test spec to build the instance image for |
required |
client
|
DockerClient
|
Docker client to use for building the image |
required |
logger
|
Logger
|
Logger to use for logging the build process |
required |
nocache
|
bool
|
Whether to use the cache when building |
required |
Source code in swebench/harness/docker_build.py
362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 |
|
build_container
build_container(test_spec: TestSpec, client: DockerClient, run_id: str, logger: Logger, nocache: bool, force_rebuild: bool = False)
Builds the instance image for the given test spec and creates a container from the image.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
test_spec
|
TestSpec
|
Test spec to build the instance image and container for |
required |
client
|
DockerClient
|
Docker client for building image + creating the container |
required |
run_id
|
str
|
Run ID identifying process, used for the container name |
required |
logger
|
Logger
|
Logger to use for logging the build process |
required |
nocache
|
bool
|
Whether to use the cache when building |
required |
force_rebuild
|
bool
|
Whether to force rebuild the image even if it already exists |
False
|
Source code in swebench/harness/docker_build.py
433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 |
|
docker_utils
HEREDOC_DELIMITER
module-attribute
HEREDOC_DELIMITER = 'EOF_1399519320'
copy_to_container
copy_to_container(container: Container, src: Path, dst: Path)
Copy a file from local to a docker container
Parameters:
Name | Type | Description | Default |
---|---|---|---|
container
|
Container
|
Docker container to copy to |
required |
src
|
Path
|
Source file path |
required |
dst
|
Path
|
Destination file path in the container |
required |
Source code in swebench/harness/docker_utils.py
18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
|
write_to_container
write_to_container(container: Container, data: str, dst: Path)
Write a string to a file in a docker container
Source code in swebench/harness/docker_utils.py
54 55 56 57 58 59 60 |
|
remove_image
remove_image(client, image_id, logger=None)
Remove a Docker image by ID.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
client
|
DockerClient
|
Docker client. |
required |
image_id
|
str
|
Image ID. |
required |
rm_image
|
bool
|
Whether to remove the image. |
required |
logger
|
Logger
|
Logger to use for output. If None, print to stdout. |
None
|
Source code in swebench/harness/docker_utils.py
63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
|
cleanup_container
cleanup_container(client, container, logger)
Stop and remove a Docker container. Performs this forcefully if the container cannot be stopped with the python API.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
client
|
DockerClient
|
Docker client. |
required |
container
|
Container
|
Container to remove. |
required |
logger
|
Logger
|
Logger to use for output. If None, print to stdout |
required |
Source code in swebench/harness/docker_utils.py
100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 |
|
exec_run_with_timeout
exec_run_with_timeout(container, cmd, timeout: int | None = 60)
Run a command in a container with a timeout.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
container
|
Container
|
Container to run the command in. |
required |
cmd
|
str
|
Command to run. |
required |
timeout
|
int
|
Timeout in seconds. |
60
|
Source code in swebench/harness/docker_utils.py
175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 |
|
find_dependent_images
find_dependent_images(client: DockerClient, image_name: str)
Find all images that are built upon image_name
image
Parameters:
Name | Type | Description | Default |
---|---|---|---|
client
|
DockerClient
|
Docker client. |
required |
image_name
|
str
|
Name of the base image. |
required |
Source code in swebench/harness/docker_utils.py
220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 |
|
list_images
list_images(client: DockerClient)
List all images from the Docker client.
Source code in swebench/harness/docker_utils.py
258 259 260 261 262 263 |
|
clean_images
clean_images(client: DockerClient, prior_images: set, cache_level: str, clean: bool)
Clean Docker images based on cache level and clean flag.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
client
|
DockerClient
|
Docker client. |
required |
prior_images
|
set
|
Set of images that existed before the current run. |
required |
cache
|
str
|
Cache level to use. |
required |
clean
|
bool
|
Whether to clean; remove images that are higher in the cache hierarchy than the current cache level. E.g. if cache_level is set to env, remove all previously built instances images. if clean is false, previously built instances images will not be removed, but instance images built in the current run will be removed. |
required |
Source code in swebench/harness/docker_utils.py
266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 |
|
should_remove
should_remove(image_name: str, cache_level: str, clean: bool, prior_images: set)
Determine if an image should be removed based on cache level and clean flag.
Source code in swebench/harness/docker_utils.py
295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 |
|
dockerfiles
__all__
module-attribute
__all__ = ['get_dockerfile_base', 'get_dockerfile_env', 'get_dockerfile_instance']
get_dockerfile_base
get_dockerfile_base(platform, arch, language, **kwargs)
Source code in swebench/harness/dockerfiles/__init__.py
29 30 31 32 33 34 35 36 |
|
get_dockerfile_env
get_dockerfile_env(platform, arch, language, base_image_key, **kwargs)
Source code in swebench/harness/dockerfiles/__init__.py
39 40 41 42 43 44 45 |
|
get_dockerfile_instance
get_dockerfile_instance(platform, language, env_image_name)
Source code in swebench/harness/dockerfiles/__init__.py
48 49 50 51 |
|
javascript
python
grading
test_passed
test_passed(case: str, sm: dict[str, str]) -> bool
Source code in swebench/harness/grading.py
27 28 |
|
test_failed
test_failed(case: str, sm: dict[str, str]) -> bool
Source code in swebench/harness/grading.py
31 32 |
|
get_logs_eval
get_logs_eval(test_spec: TestSpec, log_fp: str) -> tuple[dict[str, str], bool]
Retrieve evaluation results for a task instance from its corresponding log file
Parameters:
Name | Type | Description | Default |
---|---|---|---|
log_fp
|
str
|
path to log file |
required |
Returns: bool: whether the patch applied successfully dict: status map
TODO(john-b-yang): Check this is working properly...
Source code in swebench/harness/grading.py
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
|
get_eval_tests_report
get_eval_tests_report(eval_status_map: dict[str, str], gold_results: dict[str, str], calculate_to_fail: bool = False, eval_type: EvalType = PASS_AND_FAIL) -> dict[str, dict[str, list[str]]]
Create a report based on failure/pass change from gold results to eval results.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
eval_sm
|
dict
|
evaluation status map |
required |
gold_results
|
dict
|
gold results |
required |
calculate_to_fail
|
bool
|
whether to calculate metrics for "x to fail" tests |
False
|
Returns: report (dict): report of metrics
Metric Definitions (Gold Result Pair + Eval Result): - Fail-Pass (F2P) + P: Success (Resolution) - Pass-Pass (P2P) + P: Success (Maintenance) - Fail-Pass (F2P) + F: Failure - Pass-Pass (P2P) + F: Failure
Miscellaneous Definitions - Fail-Fail (F2F) + F: Failure Maintenance - Pass-Fail (P2F) + F: Not considered - Fail-Fail (F2F) + P: Success (Extra Credit) - Pass-Fail (P2F) + P: Not considered
Source code in swebench/harness/grading.py
80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 |
|
compute_fail_to_pass
compute_fail_to_pass(report: dict[str, dict[str, Any]]) -> float
Compute fail-to-pass metric. Accepts single report as argument.
Source code in swebench/harness/grading.py
180 181 182 183 184 185 186 187 |
|
compute_pass_to_pass
compute_pass_to_pass(report: dict[str, dict[str, Any]]) -> float
Compute pass-to-pass metric. Accepts single report as argument.
Source code in swebench/harness/grading.py
190 191 192 193 194 195 196 197 198 |
|
get_resolution_status
get_resolution_status(report: dict[str, dict[str, Any]]) -> str
Determine resolved status of an evaluation instance
Criteria
- If fail-to-pass (Resolution) = 1 and pass-to-pass (Maintenance) = 1 -> FULL
- If (fail-to-pass (Resolution) < 1 and > 0) and pass-to-pass (Maintenance) = 1 -> PARTIAL
- Otherwise -> NO
Source code in swebench/harness/grading.py
201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 |
|
get_eval_report
get_eval_report(test_spec: TestSpec, prediction: dict[str, str], test_log_path: str, include_tests_status: bool) -> dict[str, Any]
Generate a report of model evaluation results from a prediction, task instance, and evaluation log.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
test_spec
|
dict
|
test spec containing keys "instance_id", "FAIL_TO_PASS", and "PASS_TO_PASS" |
required |
prediction
|
dict
|
prediction containing keys "instance_id", "model_name_or_path", and "model_patch" |
required |
log_path
|
str
|
path to evaluation log |
required |
include_tests_status
|
bool
|
whether to include the status of each test in the returned report |
required |
Returns: report (dict): report of metrics
Source code in swebench/harness/grading.py
221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 |
|
log_parsers
MAP_REPO_TO_PARSER
module-attribute
MAP_REPO_TO_PARSER = {None: MAP_REPO_TO_PARSER_JS, None: MAP_REPO_TO_PARSER_PY}
__all__
module-attribute
__all__ = ['MAP_REPO_TO_PARSER']
javascript
MAP_REPO_TO_PARSER_JS
module-attribute
MAP_REPO_TO_PARSER_JS = {'Automattic/wp-calypso': parse_log_calypso, 'chartjs/Chart.js': parse_log_chart_js, 'markedjs/marked': parse_log_marked, 'processing/p5.js': parse_log_p5js, 'diegomura/react-pdf': parse_log_react_pdf}
parse_log_calypso
parse_log_calypso(log: str, test_spec: TestSpec) -> dict[str, str]
Parser for test logs generated by Calypso test suite
Source code in swebench/harness/log_parsers/javascript.py
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
|
parse_log_chart_js
parse_log_chart_js(log: str, test_spec: TestSpec) -> dict[str, str]
Parser for test logs generated by ChartJS test suite
Source code in swebench/harness/log_parsers/javascript.py
58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
|
parse_log_marked
parse_log_marked(log: str, test_spec: TestSpec) -> dict[str, str]
Parser for test logs generated by Marked test suite
Source code in swebench/harness/log_parsers/javascript.py
75 76 77 78 79 80 81 82 83 84 |
|
parse_log_p5js
parse_log_p5js(log_content: str) -> dict[str, str]
Source code in swebench/harness/log_parsers/javascript.py
87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 |
|
parse_log_react_pdf
parse_log_react_pdf(log: str, test_spec: TestSpec) -> dict[str, str]
Parser for test logs generated by Carbon test suite
Source code in swebench/harness/log_parsers/javascript.py
153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 |
|
python
MAP_REPO_TO_PARSER_PY
module-attribute
MAP_REPO_TO_PARSER_PY = {'astropy/astropy': parse_log_astropy, 'django/django': parse_log_django, 'marshmallow-code/marshmallow': parse_log_marshmallow, 'matplotlib/matplotlib': parse_log_matplotlib, 'mwaskom/seaborn': parse_log_seaborn, 'pallets/flask': parse_log_flask, 'psf/requests': parse_log_requests, 'pvlib/pvlib-python': parse_log_pvlib, 'pydata/xarray': parse_log_xarray, 'pydicom/pydicom': parse_log_pydicom, 'pylint-dev/astroid': parse_log_astroid, 'pylint-dev/pylint': parse_log_pylint, 'pytest-dev/pytest': parse_log_pytest, 'pyvista/pyvista': parse_log_pyvista, 'scikit-learn/scikit-learn': parse_log_scikit, 'sqlfluff/sqlfluff': parse_log_sqlfluff, 'sphinx-doc/sphinx': parse_log_sphinx, 'sympy/sympy': parse_log_sympy}
parse_log_pytest
parse_log_pytest(log: str, test_spec: TestSpec) -> dict[str, str]
Parser for test logs generated with PyTest framework
Parameters:
Name | Type | Description | Default |
---|---|---|---|
log
|
str
|
log content |
required |
Returns: dict: test case to test status mapping
Source code in swebench/harness/log_parsers/python.py
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 |
|
parse_log_pytest_options
parse_log_pytest_options(log: str, test_spec: TestSpec) -> dict[str, str]
Parser for test logs generated with PyTest framework with options
Parameters:
Name | Type | Description | Default |
---|---|---|---|
log
|
str
|
log content |
required |
Returns: dict: test case to test status mapping
Source code in swebench/harness/log_parsers/python.py
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
|
parse_log_django
parse_log_django(log: str, test_spec: TestSpec) -> dict[str, str]
Parser for test logs generated with Django tester framework
Parameters:
Name | Type | Description | Default |
---|---|---|---|
log
|
str
|
log content |
required |
Returns: dict: test case to test status mapping
Source code in swebench/harness/log_parsers/python.py
64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
|
parse_log_pytest_v2
parse_log_pytest_v2(log: str, test_spec: TestSpec) -> dict[str, str]
Parser for test logs generated with PyTest framework (Later Version)
Parameters:
Name | Type | Description | Default |
---|---|---|---|
log
|
str
|
log content |
required |
Returns: dict: test case to test status mapping
Source code in swebench/harness/log_parsers/python.py
144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 |
|
parse_log_seaborn
parse_log_seaborn(log: str, test_spec: TestSpec) -> dict[str, str]
Parser for test logs generated with seaborn testing framework
Parameters:
Name | Type | Description | Default |
---|---|---|---|
log
|
str
|
log content |
required |
Returns: dict: test case to test status mapping
Source code in swebench/harness/log_parsers/python.py
173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 |
|
parse_log_sympy
parse_log_sympy(log: str, test_spec: TestSpec) -> dict[str, str]
Parser for test logs generated with Sympy framework
Parameters:
Name | Type | Description | Default |
---|---|---|---|
log
|
str
|
log content |
required |
Returns: dict: test case to test status mapping
Source code in swebench/harness/log_parsers/python.py
199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 |
|
parse_log_matplotlib
parse_log_matplotlib(log: str, test_spec: TestSpec) -> dict[str, str]
Parser for test logs generated with PyTest framework
Parameters:
Name | Type | Description | Default |
---|---|---|---|
log
|
str
|
log content |
required |
Returns: dict: test case to test status mapping
Source code in swebench/harness/log_parsers/python.py
229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 |
|
modal_eval
__all__
module-attribute
__all__ = ['run_instances_modal', 'validate_modal_credentials']
run_instances_modal
run_instances_modal(predictions: dict, instances: list, full_dataset: list, run_id: str, timeout: int)
Run all instances for the given predictions on Modal.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
predictions
|
dict
|
Predictions dict generated by the model |
required |
instances
|
list
|
List of instances |
required |
run_id
|
str
|
Run ID |
required |
timeout
|
int
|
Timeout for running tests |
required |
Source code in swebench/harness/modal_eval/run_evaluation_modal.py
389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 |
|
validate_modal_credentials
validate_modal_credentials()
Validate that Modal credentials exist by checking for ~/.modal.toml file. Raises an exception if credentials are not configured.
Source code in swebench/harness/modal_eval/utils.py
4 5 6 7 8 9 10 11 12 13 14 |
|
run_evaluation_modal
SANDBOX_ENTRYPOINT
module-attribute
SANDBOX_ENTRYPOINT = 'run_evaluation_modal_entrypoint'
LOCAL_SANDBOX_ENTRYPOINT_PATH
module-attribute
LOCAL_SANDBOX_ENTRYPOINT_PATH = resolve()
REMOTE_SANDBOX_ENTRYPOINT_PATH
module-attribute
REMOTE_SANDBOX_ENTRYPOINT_PATH = f'/root/{SANDBOX_ENTRYPOINT}.py'
app
module-attribute
app = App('swebench-evaluation')
swebench_image
module-attribute
swebench_image = pip_install('swebench', 'tenacity')
TestOutput
dataclass
TestOutput(instance_id: str, test_output: str, report_json_str: str, run_instance_log: str, patch_diff: str, log_dir: Path, errored: bool)
instance_id
instance-attribute
instance_id: str
test_output
instance-attribute
test_output: str
report_json_str
instance-attribute
report_json_str: str
run_instance_log
instance-attribute
run_instance_log: str
patch_diff
instance-attribute
patch_diff: str
log_dir
instance-attribute
log_dir: Path
errored
instance-attribute
errored: bool
ModalSandboxRuntime
ModalSandboxRuntime(test_spec: TestSpec, timeout: int | None = None, verbose: bool = True)
Runtime for running instances in a Modal Sandbox.
Source code in swebench/harness/modal_eval/run_evaluation_modal.py
56 57 58 59 60 61 62 63 64 65 66 |
|
test_spec
instance-attribute
test_spec = test_spec
sandbox
instance-attribute
sandbox = _get_sandbox(timeout)
verbose
instance-attribute
verbose = verbose
write_file
write_file(file_path: str, content: str)
Source code in swebench/harness/modal_eval/run_evaluation_modal.py
116 117 |
|
exec
exec(command: str) -> tuple[str, int]
Execute a command in the sandbox.
Returns:
Type | Description |
---|---|
tuple[str, int]
|
tuple[str, int]: Sandbox output and return code. |
Source code in swebench/harness/modal_eval/run_evaluation_modal.py
119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
|
__exit__
__exit__(exc_type, exc_val, exc_tb)
Source code in swebench/harness/modal_eval/run_evaluation_modal.py
139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
|
get_instance_image
staticmethod
get_instance_image(test_spec: TestSpec) -> Image
Source code in swebench/harness/modal_eval/run_evaluation_modal.py
159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 |
|
get_log_dir
get_log_dir(pred: dict, run_id: str, instance_id: str) -> Path
Source code in swebench/harness/modal_eval/run_evaluation_modal.py
213 214 215 216 217 |
|
run_instance_modal
run_instance_modal(test_spec: TestSpec, pred: dict, run_id: str, timeout: int | None = None) -> TestOutput
Run a single instance with the given prediction.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
test_spec
|
TestSpec
|
TestSpec instance |
required |
pred
|
dict
|
Prediction w/ model_name_or_path, model_patch, instance_id |
required |
run_id
|
str
|
Run ID |
required |
timeout
|
int
|
Timeout for running tests |
None
|
Source code in swebench/harness/modal_eval/run_evaluation_modal.py
220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 |
|
run_instances_modal
run_instances_modal(predictions: dict, instances: list, full_dataset: list, run_id: str, timeout: int)
Run all instances for the given predictions on Modal.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
predictions
|
dict
|
Predictions dict generated by the model |
required |
instances
|
list
|
List of instances |
required |
run_id
|
str
|
Run ID |
required |
timeout
|
int
|
Timeout for running tests |
required |
Source code in swebench/harness/modal_eval/run_evaluation_modal.py
389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 |
|
run_evaluation_modal_entrypoint
STDIO_RATE_LIMIT_BYTES_PER_SEC
module-attribute
STDIO_RATE_LIMIT_BYTES_PER_SEC = 64 * 1024 // 2
parser
module-attribute
parser = ArgumentParser(description='Execute a shell command and stream output')
args
module-attribute
args = parse_args()
exec
async
exec(command: str) -> int
Source code in swebench/harness/modal_eval/run_evaluation_modal_entrypoint.py
16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
|
main
async
main(command: str)
Source code in swebench/harness/modal_eval/run_evaluation_modal_entrypoint.py
111 112 113 |
|
utils
validate_modal_credentials
validate_modal_credentials()
Validate that Modal credentials exist by checking for ~/.modal.toml file. Raises an exception if credentials are not configured.
Source code in swebench/harness/modal_eval/utils.py
4 5 6 7 8 9 10 11 12 13 14 |
|
prepare_images
parser
module-attribute
parser = ArgumentParser()
args
module-attribute
args = parse_args()
filter_dataset_to_build
filter_dataset_to_build(dataset: list, instance_ids: list | None, client: DockerClient, force_rebuild: bool, namespace: str = None, tag: str = None)
Filter the dataset to only include instances that need to be built.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
dataset
|
list
|
List of instances (usually all of SWE-bench dev/test split) |
required |
instance_ids
|
list
|
List of instance IDs to build. |
required |
client
|
DockerClient
|
Docker client. |
required |
force_rebuild
|
bool
|
Whether to force rebuild all images. |
required |
Source code in swebench/harness/prepare_images.py
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
|
main
main(dataset_name, split, instance_ids, max_workers, force_rebuild, open_file_limit, namespace, tag)
Build Docker images for the specified instances.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
instance_ids
|
list
|
List of instance IDs to build. |
required |
max_workers
|
int
|
Number of workers for parallel processing. |
required |
force_rebuild
|
bool
|
Whether to force rebuild all images. |
required |
open_file_limit
|
int
|
Open file limit. |
required |
Source code in swebench/harness/prepare_images.py
59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
|
remove_containers
parser
module-attribute
parser = ArgumentParser(description=__doc__)
args
module-attribute
args = parse_args()
instance_ids
module-attribute
instance_ids = [strip() for i in split(',')] if instance_ids else []
main
main(instance_ids, predictions_path)
Source code in swebench/harness/remove_containers.py
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 |
|
reporting
make_run_report
make_run_report(predictions: dict, full_dataset: list, run_id: str, client: Optional[DockerClient] = None) -> Path
Make a final evaluation and run report of the instances that have been run. Also reports on images and containers that may still running if client is provided.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
predictions
|
dict
|
Predictions dict generated by the model |
required |
full_dataset
|
list
|
List of all instances |
required |
run_id
|
str
|
Run ID |
required |
client
|
DockerClient
|
Docker client (optional) |
None
|
Returns:
Type | Description |
---|---|
Path
|
Path to report file |
Source code in swebench/harness/reporting.py
17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
|
run_evaluation
GIT_APPLY_CMDS
module-attribute
GIT_APPLY_CMDS = ['git apply --verbose', 'git apply --verbose --reject', 'patch --batch --fuzz=5 -p1 -i']
parser
module-attribute
parser = ArgumentParser(description='Run evaluation harness for the given dataset and predictions.', formatter_class=ArgumentDefaultsHelpFormatter)
args
module-attribute
args = parse_args()
run_instance
run_instance(test_spec: TestSpec, pred: dict, rm_image: bool, force_rebuild: bool, client: DockerClient, run_id: str, timeout: int | None = None, rewrite_reports: bool = False)
Run a single instance with the given prediction.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
test_spec
|
TestSpec
|
TestSpec instance |
required |
pred
|
dict
|
Prediction w/ model_name_or_path, model_patch, instance_id |
required |
rm_image
|
bool
|
Whether to remove the image after running |
required |
force_rebuild
|
bool
|
Whether to force rebuild the image |
required |
client
|
DockerClient
|
Docker client |
required |
run_id
|
str
|
Run ID |
required |
timeout
|
int
|
Timeout for running tests |
None
|
rewrite_reports
|
bool
|
True if eval run is just to reformat existing report |
False
|
Source code in swebench/harness/run_evaluation.py
68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 |
|
run_instances
run_instances(predictions: dict, instances: list, cache_level: str, clean: bool, force_rebuild: bool, max_workers: int, run_id: str, timeout: int, namespace: str = 'swebench', instance_image_tag: str = 'latest', rewrite_reports: bool = False)
Run all instances for the given predictions in parallel.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
predictions
|
dict
|
Predictions dict generated by the model |
required |
instances
|
list
|
List of instances |
required |
cache_level
|
str
|
Cache level |
required |
clean
|
bool
|
Clean images above cache level |
required |
force_rebuild
|
bool
|
Force rebuild images |
required |
max_workers
|
int
|
Maximum number of workers |
required |
run_id
|
str
|
Run ID |
required |
timeout
|
int
|
Timeout for running tests |
required |
Source code in swebench/harness/run_evaluation.py
265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 |
|
get_dataset_from_preds
get_dataset_from_preds(dataset_name: str, split: str, instance_ids: list, predictions: dict, run_id: str, rewrite_reports: bool, exclude_completed: bool = True)
Return only instances that have predictions and are in the dataset. If instance_ids is provided, only return instances with those IDs. If exclude_completed is True, only return instances that have not been run yet.
Source code in swebench/harness/run_evaluation.py
341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 |
|
main
main(dataset_name: str, split: str, instance_ids: list, predictions_path: str, max_workers: int, force_rebuild: bool, cache_level: str, clean: bool, open_file_limit: int, run_id: str, timeout: int, namespace: str | None, rewrite_reports: bool, modal: bool, instance_image_tag: str = 'latest', report_dir: str = '.')
Run evaluation harness for the given dataset and predictions.
Source code in swebench/harness/run_evaluation.py
441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 |
|
test_spec
__all__
module-attribute
__all__ = ['test_spec', 'create_scripts', 'javascript', 'python']
create_scripts
make_repo_script_list
make_repo_script_list(specs, repo, repo_directory, base_commit, env_name) -> list
Create a list of bash commands to set up the repository for testing. This is the setup script for the instance image.
Source code in swebench/harness/test_spec/create_scripts.py
14 15 16 17 18 19 20 21 22 23 24 |
|
make_env_script_list
make_env_script_list(instance, specs, env_name) -> list
Creates the list of commands to set up the environment for testing. This is the setup script for the environment image.
Source code in swebench/harness/test_spec/create_scripts.py
27 28 29 30 31 32 33 34 35 36 37 |
|
make_eval_script_list
make_eval_script_list(instance, specs, env_name, repo_directory, base_commit, test_patch) -> list
Applies the test patch and runs the tests.
Source code in swebench/harness/test_spec/create_scripts.py
40 41 42 43 44 45 46 47 48 49 50 51 |
|
javascript
MAP_REPO_TO_TEST_CMDS
module-attribute
MAP_REPO_TO_TEST_CMDS = {'Automattic/wp-calypso': get_test_cmds_calypso}
get_test_cmds_calypso
get_test_cmds_calypso(instance) -> list
Source code in swebench/harness/test_spec/javascript.py
15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
|
get_test_cmds
get_test_cmds(instance) -> list
Source code in swebench/harness/test_spec/javascript.py
71 72 73 74 75 76 77 |
|
get_download_img_commands
get_download_img_commands(instance) -> list
Source code in swebench/harness/test_spec/javascript.py
83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
|
make_repo_script_list_js
make_repo_script_list_js(specs, repo, repo_directory, base_commit, env_name) -> list
Create a list of bash commands to set up the repository for testing. This is the setup script for the instance image.
Source code in swebench/harness/test_spec/javascript.py
102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 |
|
make_env_script_list_js
make_env_script_list_js(instance, specs, env_name) -> list
Creates the list of commands to set up the environment for testing. This is the setup script for the environment image.
Source code in swebench/harness/test_spec/javascript.py
122 123 124 125 126 127 128 129 130 131 132 133 |
|
make_eval_script_list_js
make_eval_script_list_js(instance, specs, env_name, repo_directory, base_commit, test_patch) -> list
Applies the test patch and runs the tests.
Source code in swebench/harness/test_spec/javascript.py
136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 |
|
python
HEADERS
module-attribute
HEADERS = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
get_environment_yml_by_commit
cached
get_environment_yml_by_commit(repo: str, commit: str, env_name: str) -> str
Source code in swebench/harness/test_spec/python.py
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
|
get_environment_yml
get_environment_yml(instance: SWEbenchInstance, env_name: str) -> str
Get environment.yml for given task instance
Parameters:
Name | Type | Description | Default |
---|---|---|---|
instance
|
dict
|
SWE Bench Task instance |
required |
env_name
|
str
|
Rename retrieved environment.yml to this name |
required |
Returns: environment.yml (str): Returns environment.yml as string
Source code in swebench/harness/test_spec/python.py
49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
|
get_requirements_by_commit
cached
get_requirements_by_commit(repo: str, commit: str) -> str
Source code in swebench/harness/test_spec/python.py
69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
|
get_requirements
get_requirements(instance: SWEbenchInstance) -> str
Get requirements.txt for given task instance
Parameters:
Name | Type | Description | Default |
---|---|---|---|
instance
|
dict
|
task instance |
required |
Returns: requirements.txt (str): Returns requirements.txt as string
Source code in swebench/harness/test_spec/python.py
116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
|
get_test_directives
get_test_directives(instance: SWEbenchInstance) -> list
Get test directives from the test_patch of a task instance
Parameters:
Name | Type | Description | Default |
---|---|---|---|
instance
|
dict
|
task instance |
required |
Returns: directives (list): List of test directives
Source code in swebench/harness/test_spec/python.py
135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 |
|
make_repo_script_list_py
make_repo_script_list_py(specs, repo, repo_directory, base_commit, env_name) -> list
Create a list of bash commands to set up the repository for testing. This is the setup script for the instance image.
Source code in swebench/harness/test_spec/python.py
169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 |
|
make_env_script_list_py
make_env_script_list_py(instance, specs, env_name) -> list
Creates the list of commands to set up the conda environment for testing. This is the setup script for the environment image.
Source code in swebench/harness/test_spec/python.py
214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 |
|
make_eval_script_list_py
make_eval_script_list_py(instance, specs, env_name, repo_directory, base_commit, test_patch) -> list
Applies the test patch and runs the tests.
Source code in swebench/harness/test_spec/python.py
281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 |
|
test_spec
TestSpec
dataclass
TestSpec(instance_id: str, repo: str, version: str, repo_script_list: list[str], eval_script_list: list[str], env_script_list: list[str], arch: str, FAIL_TO_PASS: list[str], PASS_TO_PASS: list[str], language: str, docker_specs: dict, namespace: str, base_image_tag: str = LATEST, env_image_tag: str = LATEST, instance_image_tag: str = LATEST)
A dataclass that represents a test specification for a single instance of SWE-bench.
instance_id
instance-attribute
instance_id: str
repo
instance-attribute
repo: str
version
instance-attribute
version: str
repo_script_list
instance-attribute
repo_script_list: list[str]
eval_script_list
instance-attribute
eval_script_list: list[str]
env_script_list
instance-attribute
env_script_list: list[str]
arch
instance-attribute
arch: str
FAIL_TO_PASS
instance-attribute
FAIL_TO_PASS: list[str]
PASS_TO_PASS
instance-attribute
PASS_TO_PASS: list[str]
language
instance-attribute
language: str
docker_specs
instance-attribute
docker_specs: dict
namespace
instance-attribute
namespace: str
setup_env_script
property
setup_env_script
eval_script
property
eval_script
install_repo_script
property
install_repo_script
base_image_key
property
base_image_key
env_image_key
property
env_image_key
The key for the environment image is based on the hash of the environment script list. If the environment script list changes, the image will be rebuilt automatically.
Note that old images are not automatically deleted, so consider cleaning up old images periodically.
instance_image_key
property
instance_image_key
is_remote_image
property
is_remote_image
base_dockerfile
property
base_dockerfile
env_dockerfile
property
env_dockerfile
instance_dockerfile
property
instance_dockerfile
platform
property
platform
get_instance_container_name
get_instance_container_name(run_id=None)
Source code in swebench/harness/test_spec/test_spec.py
107 108 109 110 |
|
get_test_specs_from_dataset
get_test_specs_from_dataset(dataset: Union[list[SWEbenchInstance], list[TestSpec]], namespace: str = None, instance_image_tag: str = LATEST) -> list[TestSpec]
Idempotent function that converts a list of SWEbenchInstance objects to a list of TestSpec objects.
Source code in swebench/harness/test_spec/test_spec.py
145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 |
|
make_test_spec
make_test_spec(instance: SWEbenchInstance, namespace: str = None, base_image_tag: str = LATEST, env_image_tag: str = LATEST, instance_image_tag: str = LATEST) -> TestSpec
Source code in swebench/harness/test_spec/test_spec.py
163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 |
|
utils
PATCH_PATTERN
module-attribute
PATCH_PATTERN = compile('(?:diff[\\w\\_\\.\\ \\/\\-]+\\n)?\\-\\-\\-\\s+a\\/(?:.*?)\\n\\+\\+\\+\\s+b\\/(?:.*?)(?=diff\\ |\\-\\-\\-\\ a\\/|\\Z)', DOTALL)
PATCH_FILE_PATTERN
module-attribute
PATCH_FILE_PATTERN = compile('\\-\\-\\-\\s+a\\/(?:.+)\\n\\+\\+\\+\\s+b\\/(?:.+)')
PATCH_HUNK_PATTERN
module-attribute
PATCH_HUNK_PATTERN = compile('\\@\\@\\s+\\-(\\d+),(\\d+)\\s+\\+(\\d+),(\\d+)\\s+\\@\\@(.+?)(?=diff\\ |\\-\\-\\-\\ a\\/|\\@\\@\\ \\-|\\Z)', DOTALL)
EvaluationError
EvaluationError(instance_id, message, logger)
Bases: Exception
Source code in swebench/harness/utils.py
25 26 27 28 29 |
|
instance_id
instance-attribute
instance_id = instance_id
log_file
instance-attribute
log_file = log_file
logger
instance-attribute
logger = logger
__str__
__str__()
Source code in swebench/harness/utils.py
31 32 33 34 35 36 37 |
|
get_predictions_from_file
get_predictions_from_file(predictions_path: str, dataset_name: str, split: str)
Source code in swebench/harness/utils.py
40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
|
run_threadpool
run_threadpool(func, payloads, max_workers)
Source code in swebench/harness/utils.py
79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
|
run_sequential
run_sequential(func, args_list)
Run a function with a list of arguments sequentially
Source code in swebench/harness/utils.py
105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 |
|
load_swebench_dataset
load_swebench_dataset(name='princeton-nlp/SWE-bench', split='test', instance_ids=None) -> list[SWEbenchInstance]
Load SWE-bench dataset from Hugging Face Datasets or local .json/.jsonl file
Source code in swebench/harness/utils.py
124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 |
|
get_first_idx
get_first_idx(charlist)
Get index of first occurrence of "-" or "+" in charlist
Source code in swebench/harness/utils.py
182 183 184 185 186 |
|
get_last_idx
get_last_idx(charlist)
Get index of last occurrence of "-" or "+" in charlist
Source code in swebench/harness/utils.py
189 190 191 192 193 |
|
strip_content
strip_content(hunk)
Remove trailing non +/- lines and trailing whitespace per line per hunk
Source code in swebench/harness/utils.py
196 197 198 199 200 201 202 203 204 205 |
|
get_hunk_stats
get_hunk_stats(pre_start, pre_len, post_start, post_len, hunk, total_delta)
Recalculate hunk start/end position and diff delta
Source code in swebench/harness/utils.py
208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 |
|
extract_minimal_patch
extract_minimal_patch(model_patch)
Wrapper function that takes hunk and * Removes trailing non +/- lines and trailing whitespace per line per hunk * Recalculates hunk start/end position and diff delta * Returns new patch
Source code in swebench/harness/utils.py
229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 |
|
has_attribute_or_import_error
has_attribute_or_import_error(log_before)
Check to see if Attribute/Import-prefix is in log text
Parameters:
Name | Type | Description | Default |
---|---|---|---|
log_before
|
str
|
Validation log text before patch application |
required |
Source code in swebench/harness/utils.py
259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 |
|
str2bool
str2bool(v)
Minor helper function to convert string to boolean
Source code in swebench/harness/utils.py
290 291 292 293 294 295 296 297 298 299 300 301 |
|
get_repo_file
get_repo_file(repo, commit, filepath)
Source code in swebench/harness/utils.py
304 305 306 307 308 309 310 311 312 |
|
get_modified_files
get_modified_files(patch: str) -> list[str]
Get the list of modified files in a patch
Source code in swebench/harness/utils.py
315 316 317 318 319 320 321 322 323 324 |
|
ansi_escape
ansi_escape(text: str) -> str
Remove ANSI escape sequences from text
Source code in swebench/harness/utils.py
327 328 329 330 331 |
|